[llvm] [AMDGPU][True16][MC] true16 for VOPC instructions (PR #111156)
Brox Chen via llvm-commits
llvm-commits at lists.llvm.org
Wed Oct 9 10:20:24 PDT 2024
https://github.com/broxigarchen updated https://github.com/llvm/llvm-project/pull/111156
>From f34e0a9b4a27a22c436a2aed0eaa6cecbfd0caf2 Mon Sep 17 00:00:00 2001
From: guochen2 <guochen2 at amd.com>
Date: Tue, 17 Sep 2024 15:06:04 -0400
Subject: [PATCH 1/2] [AMDGPU][True16][MC] VOP3 profile in True16 format
---
llvm/lib/Target/AMDGPU/SIInstrInfo.td | 6 +-
llvm/lib/Target/AMDGPU/VOP2Instructions.td | 4 +-
llvm/lib/Target/AMDGPU/VOP3Instructions.td | 90 ++--
llvm/lib/Target/AMDGPU/VOPInstructions.td | 308 +++++++++--
.../AMDGPU/GlobalISel/inst-select-add.s16.mir | 40 +-
.../test/CodeGen/AMDGPU/dpp_combine_gfx11.mir | 32 +-
.../isel-amdgpu-cs-chain-preserve-cc.ll | 6 +-
llvm/test/MC/AMDGPU/gfx11_asm_opsel.s | 48 ++
llvm/test/MC/AMDGPU/gfx11_asm_vop3.s | 260 ++++++----
llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s | 240 ++++-----
llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s | 172 +++---
llvm/test/MC/AMDGPU/gfx12_asm_opsel.s | 48 ++
llvm/test/MC/AMDGPU/gfx12_asm_vop3.s | 156 +++---
llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s | 328 +++++++-----
llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s | 176 ++++---
.../Disassembler/AMDGPU/gfx11_dasm_vop3.txt | 444 ++++++++++++----
.../AMDGPU/gfx11_dasm_vop3_dpp16.txt | 376 ++++++++++++--
.../AMDGPU/gfx11_dasm_vop3_dpp8.txt | 196 ++++++-
.../Disassembler/AMDGPU/gfx12_dasm_vop3.txt | 384 +++++++++++---
.../AMDGPU/gfx12_dasm_vop3_dpp16.txt | 488 ++++++++++++++++--
.../AMDGPU/gfx12_dasm_vop3_dpp8.txt | 308 ++++++++++-
21 files changed, 3143 insertions(+), 967 deletions(-)
create mode 100644 llvm/test/MC/AMDGPU/gfx11_asm_opsel.s
create mode 100644 llvm/test/MC/AMDGPU/gfx12_asm_opsel.s
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 087ca1f954464d..be42d3595bea61 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -2237,8 +2237,9 @@ class getAsmVOP3Base <int NumSrcArgs, bit HasDst, bit HasClamp,
string clamp = !if(HasClamp, "$clamp", "");
string omod = !if(HasOMod, "$omod", "");
- string ret = dst#!if(!gt(NumSrcArgs,0),", "#src0#src1#src2#opsel#bytesel#3PMods#clamp#omod, "");
-
+ string ret = dst#!if(!eq(NumSrcArgs,0),
+ "",
+ !if(HasDst,", ", "")#src0#src1#src2#opsel#bytesel#3PMods#clamp#omod);
}
class getAsmVOP3DPP<string base> {
@@ -2733,6 +2734,7 @@ def VOP_F32_F32_F16_F16 : VOPProfile <[f32, f32, f16, f16]>;
def VOP_F32_F32_F32_F32 : VOPProfile <[f32, f32, f32, f32]>;
def VOP_F64_F64_F64_F64 : VOPProfile <[f64, f64, f64, f64]>;
def VOP_I32_I32_I32_I32 : VOPProfile <[i32, i32, i32, i32]>;
+def VOP_I32_I32_I32_I16 : VOPProfile <[i32, i32, i32, i16]>;
def VOP_I64_I32_I32_I64 : VOPProfile <[i64, i32, i32, i64]>;
def VOP_I32_F32_I32_I32 : VOPProfile <[i32, f32, i32, i32]>;
def VOP_I64_I64_I32_I64 : VOPProfile <[i64, i64, i32, i64]>;
diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
index cdc32149249610..f411ea277bbe23 100644
--- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
@@ -1661,8 +1661,8 @@ multiclass VOP3Only_Realtriple_gfx11_gfx12<bits<10> op> :
VOP3Only_Realtriple<GFX11Gen, op>, VOP3Only_Realtriple<GFX12Gen, op>;
multiclass VOP3Only_Realtriple_t16_gfx11_gfx12<bits<10> op, string asmName, string OpName = NAME> :
- VOP3Only_Realtriple_t16<GFX11Gen, op, asmName, OpName>,
- VOP3Only_Realtriple_t16<GFX12Gen, op, asmName, OpName>;
+ VOP3_Realtriple_t16_gfx11<op, asmName, OpName, "", 1>,
+ VOP3_Realtriple_t16_gfx12<op, asmName, OpName, "", 1>;
multiclass VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<bits<10> op, string asmName, string OpName = NAME> {
defm OpName#"_t16": VOP3Only_Realtriple_t16_gfx11_gfx12<op, asmName, OpName#"_t16">;
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index 78ca7a2f258cb3..fe37b56bcbf862 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -569,16 +569,10 @@ def VOP3_CVT_SR_F8_F32_Profile : VOP3_Profile<VOPProfile<[i32, f32, i32, f32]>,
getAsmVOP3OpSel<3, HasClamp, HasOMod,
HasSrc0FloatMods, HasSrc1FloatMods,
HasSrc2FloatMods>.ret);
- let AsmVOP3DPP16 = !subst(", $src2_modifiers", "",
- getAsmVOP3DPP16<getAsmVOP3Base<3, 1, HasClamp, 1,
- HasOMod, 0, 1, HasSrc0FloatMods,
- HasSrc1FloatMods,
- HasSrc2FloatMods>.ret>.ret);
- let AsmVOP3DPP8 = !subst(", $src2_modifiers", "",
- getAsmVOP3DPP8<getAsmVOP3Base<3, 1, HasClamp, 1,
- HasOMod, 0, 1, HasSrc0FloatMods,
- HasSrc1FloatMods,
- HasSrc2FloatMods>.ret>.ret);
+ let AsmVOP3Base = !subst(", $src2_modifiers", "",
+ getAsmVOP3Base<NumSrcArgs, HasDst, HasClamp,
+ HasOpSel, HasOMod, IsVOP3P, HasModifiers, HasModifiers, 0/*Src1Mods*/,
+ HasModifiers, DstVT>.ret);
}
class VOP3_CVT_SR_F8_ByteSel_Profile<ValueType SrcVT> :
@@ -636,8 +630,8 @@ let SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0 in {
defm V_MAXIMUM3_F16 : VOP3Inst <"v_maximum3_f16", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, AMDGPUfmaximum3>;
} // End SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0
-defm V_ADD_I16 : VOP3Inst <"v_add_i16", VOP3_Profile<VOP_I16_I16_I16, VOP3_OPSEL>>;
-defm V_SUB_I16 : VOP3Inst <"v_sub_i16", VOP3_Profile<VOP_I16_I16_I16, VOP3_OPSEL>>;
+defm V_ADD_I16 : VOP3Inst_t16 <"v_add_i16", VOP_I16_I16_I16>;
+defm V_SUB_I16 : VOP3Inst_t16 <"v_sub_i16", VOP_I16_I16_I16>;
defm V_MAD_U32_U16 : VOP3Inst <"v_mad_u32_u16", VOP3_Profile<VOP_I32_I16_I16_I32, VOP3_OPSEL>>;
defm V_MAD_I32_I16 : VOP3Inst <"v_mad_i32_i16", VOP3_Profile<VOP_I32_I16_I16_I32, VOP3_OPSEL>>;
@@ -752,6 +746,8 @@ def : GCNPat<(DivergentBinFrag<or> (or_oneuse i64:$src0, i64:$src1), i64:$src2),
(i32 (EXTRACT_SUBREG $src1, sub1)),
(i32 (EXTRACT_SUBREG $src2, sub1))), sub1)>;
+} // End SubtargetPredicate = isGFX9Plus
+
// FIXME: Probably should hardcode clamp bit in pseudo and avoid this.
class OpSelBinOpClampPat<SDPatternOperator node,
Instruction inst> : GCNPat<
@@ -760,9 +756,14 @@ class OpSelBinOpClampPat<SDPatternOperator node,
(inst $src0_modifiers, $src0, $src1_modifiers, $src1, DSTCLAMP.ENABLE, 0)
>;
-def : OpSelBinOpClampPat<saddsat, V_ADD_I16_e64>;
-def : OpSelBinOpClampPat<ssubsat, V_SUB_I16_e64>;
-} // End SubtargetPredicate = isGFX9Plus
+let OtherPredicates = [isGFX9Plus], True16Predicate = NotHasTrue16BitInsts in {
+ def : OpSelBinOpClampPat<saddsat, V_ADD_I16_e64>;
+ def : OpSelBinOpClampPat<ssubsat, V_SUB_I16_e64>;
+} // End OtherPredicates = [isGFX9Plus], True16Predicate = NotHasTrue16BitInsts
+let True16Predicate = UseFakeTrue16Insts in {
+ def : OpSelBinOpClampPat<saddsat, V_ADD_I16_fake16_e64>;
+ def : OpSelBinOpClampPat<ssubsat, V_SUB_I16_fake16_e64>;
+} // End True16Predicate = UseFakeTrue16Insts
multiclass IMAD32_Pats <VOP3_Pseudo inst> {
def : GCNPat <
@@ -871,21 +872,31 @@ let SubtargetPredicate = isGFX10Plus in {
def : PermlanePat<int_amdgcn_permlanex16, V_PERMLANEX16_B32_e64, vt>;
}
- defm V_ADD_NC_U16 : VOP3Inst <"v_add_nc_u16", VOP3_Profile<VOP_I16_I16_I16, VOP3_OPSEL>, add>;
- defm V_SUB_NC_U16 : VOP3Inst <"v_sub_nc_u16", VOP3_Profile<VOP_I16_I16_I16, VOP3_OPSEL>, sub>;
-
- def : OpSelBinOpClampPat<uaddsat, V_ADD_NC_U16_e64>;
- def : OpSelBinOpClampPat<usubsat, V_SUB_NC_U16_e64>;
-
- // Undo sub x, c -> add x, -c canonicalization since c is more likely
- // an inline immediate than -c.
- def : GCNPat<
- (add i16:$src0, (i16 NegSubInlineIntConst16:$src1)),
- (V_SUB_NC_U16_e64 0, VSrc_b16:$src0, 0, NegSubInlineIntConst16:$src1, 0, 0)
- >;
+ defm V_ADD_NC_U16 : VOP3Inst_t16 <"v_add_nc_u16", VOP_I16_I16_I16, add>;
+ defm V_SUB_NC_U16 : VOP3Inst_t16 <"v_sub_nc_u16", VOP_I16_I16_I16, sub>;
} // End SubtargetPredicate = isGFX10Plus
+let True16Predicate = NotHasTrue16BitInsts, OtherPredicates = [isGFX10Plus] in {
+ def : OpSelBinOpClampPat<uaddsat, V_ADD_NC_U16_e64>;
+ def : OpSelBinOpClampPat<usubsat, V_SUB_NC_U16_e64>;
+ // Undo sub x, c -> add x, -c canonicalization since c is more likely
+ // an inline immediate than -c.
+ def : GCNPat<
+ (add i16:$src0, (i16 NegSubInlineIntConst16:$src1)),
+ (V_SUB_NC_U16_e64 0, VSrc_b16:$src0, 0, NegSubInlineIntConst16:$src1, 0, 0)
+ >;
+} // End True16Predicate = NotHasTrue16BitInsts, OtherPredicates = [isGFX10Plus]
+
+let True16Predicate = UseFakeTrue16Insts in {
+ def : OpSelBinOpClampPat<uaddsat, V_ADD_NC_U16_fake16_e64>;
+ def : OpSelBinOpClampPat<usubsat, V_SUB_NC_U16_fake16_e64>;
+ def : GCNPat<
+ (add i16:$src0, (i16 NegSubInlineIntConst16:$src1)),
+ (V_SUB_NC_U16_fake16_e64 0, VSrc_b16:$src0, 0, NegSubInlineIntConst16:$src1, 0, 0)
+ >;
+} // End True16Predicate = UseFakeTrue16Insts
+
let SubtargetPredicate = isGFX12Plus in {
let Constraints = "$vdst = $vdst_in", DisableEncoding="$vdst_in" in {
defm V_PERMLANE16_VAR_B32 : VOP3Inst<"v_permlane16_var_b32", VOP3_PERMLANE_VAR_Profile>;
@@ -1101,9 +1112,26 @@ multiclass VOP3_Realtriple_with_name_gfx11_gfx12<bits<10> op, string opName,
VOP3_Realtriple_with_name<GFX11Gen, op, opName, asmName>,
VOP3_Realtriple_with_name<GFX12Gen, op, opName, asmName>;
+multiclass VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<bits<10> op, string opName,
+ string asmName> {
+ defm opName#"_t16": VOP3_Realtriple_with_name_gfx11_gfx12<op, opName#"_t16", asmName>;
+ defm opName#"_fake16": VOP3_Realtriple_with_name_gfx11_gfx12<op, opName#"_fake16", asmName>;
+}
+
multiclass VOP3Dot_Realtriple_gfx11_gfx12<bits<10> op> :
VOP3Dot_Realtriple<GFX11Gen, op>, VOP3Dot_Realtriple<GFX12Gen, op>;
+multiclass VOP3_Realtriple_t16_gfx11_gfx12<bits<10> op, string asmName, string opName = NAME,
+ string pseudo_mnemonic = "", bit isSingle = 0> :
+ VOP3_Realtriple_with_name<GFX11Gen, op, opName, asmName, pseudo_mnemonic, isSingle>,
+ VOP3_Realtriple_with_name<GFX12Gen, op, opName, asmName, pseudo_mnemonic, isSingle>;
+
+multiclass VOP3_Realtriple_t16_and_f16_gfx11_gfx12<bits<10> op, string asmName, string opName = NAME,
+ string pseudo_mnemonic = "", bit isSingle = 0> {
+ defm opName#"_t16": VOP3_Realtriple_t16_gfx11_gfx12<op, asmName, opName#"_t16", pseudo_mnemonic, isSingle>;
+ defm opName#"_fake16": VOP3_Realtriple_t16_gfx11_gfx12<op, asmName, opName#"_fake16", pseudo_mnemonic, isSingle>;
+}
+
multiclass VOP3be_Real_gfx11_gfx12<bits<10> op, string opName, string asmName> :
VOP3be_Real<GFX11Gen, op, opName, asmName>,
VOP3be_Real<GFX12Gen, op, opName, asmName>;
@@ -1189,8 +1217,8 @@ defm V_DIV_SCALE_F32 : VOP3be_Real_gfx11_gfx12<0x2fc, "V_DIV_SCALE_F32", "
defm V_DIV_SCALE_F64 : VOP3be_Real_gfx11_gfx12<0x2fd, "V_DIV_SCALE_F64", "v_div_scale_f64">;
defm V_MAD_U64_U32_gfx11 : VOP3be_Real_gfx11<0x2fe, "V_MAD_U64_U32_gfx11", "v_mad_u64_u32">;
defm V_MAD_I64_I32_gfx11 : VOP3be_Real_gfx11<0x2ff, "V_MAD_I64_I32_gfx11", "v_mad_i64_i32">;
-defm V_ADD_NC_U16 : VOP3Only_Realtriple_gfx11_gfx12<0x303>;
-defm V_SUB_NC_U16 : VOP3Only_Realtriple_gfx11_gfx12<0x304>;
+defm V_ADD_NC_U16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x303, "v_add_nc_u16">;
+defm V_SUB_NC_U16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x304, "v_sub_nc_u16">;
defm V_MUL_LO_U16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x305, "v_mul_lo_u16">;
defm V_CVT_PK_I16_F32 : VOP3_Realtriple_gfx11_gfx12<0x306>;
defm V_CVT_PK_U16_F32 : VOP3_Realtriple_gfx11_gfx12<0x307>;
@@ -1198,8 +1226,8 @@ defm V_MAX_U16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x30
defm V_MAX_I16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x30a, "v_max_i16">;
defm V_MIN_U16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x30b, "v_min_u16">;
defm V_MIN_I16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x30c, "v_min_i16">;
-defm V_ADD_NC_I16 : VOP3_Realtriple_with_name_gfx11_gfx12<0x30d, "V_ADD_I16", "v_add_nc_i16">;
-defm V_SUB_NC_I16 : VOP3_Realtriple_with_name_gfx11_gfx12<0x30e, "V_SUB_I16", "v_sub_nc_i16">;
+defm V_ADD_NC_I16 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x30d, "V_ADD_I16", "v_add_nc_i16">;
+defm V_SUB_NC_I16 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x30e, "V_SUB_I16", "v_sub_nc_i16">;
defm V_PACK_B32_F16 : VOP3_Realtriple_gfx11_gfx12<0x311>;
defm V_CVT_PK_NORM_I16_F16 : VOP3_Realtriple_with_name_gfx11_gfx12<0x312, "V_CVT_PKNORM_I16_F16" , "v_cvt_pk_norm_i16_f16" >;
defm V_CVT_PK_NORM_U16_F16 : VOP3_Realtriple_with_name_gfx11_gfx12<0x313, "V_CVT_PKNORM_U16_F16" , "v_cvt_pk_norm_u16_f16" >;
diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td
index 05a7d907d237ae..0c21ef29cc3c74 100644
--- a/llvm/lib/Target/AMDGPU/VOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td
@@ -111,7 +111,7 @@ class VOP3_Pseudo <string opName, VOPProfile P, list<dag> pattern = [],
bit HasFP8DstByteSel = P.HasFP8DstByteSel;
- let AsmOperands = !if(isVop3OpSel,
+ let AsmOperands = !if(!and(!not(P.IsTrue16), isVop3OpSel),
P.AsmVOP3OpSel,
!if(!and(isVOP3P, P.IsPacked), P.AsmVOP3P, P.Asm64));
@@ -178,6 +178,7 @@ class VOP3_Real <VOP_Pseudo ps, int EncodingFamily, string asm_name = ps.Mnemoni
let SubtargetPredicate = ps.SubtargetPredicate;
let WaveSizePredicate = ps.WaveSizePredicate;
let OtherPredicates = ps.OtherPredicates;
+ let True16Predicate = ps.True16Predicate;
let AsmMatchConverter = ps.AsmMatchConverter;
let AsmVariantName = ps.AsmVariantName;
let Constraints = ps.Constraints;
@@ -242,6 +243,41 @@ class VOP3a<VOPProfile P> : Enc64 {
let Inst{63} = !if(P.HasSrc2Mods, src2_modifiers{0}, 0);
}
+// To avoid having different version of every type of operand depending on if
+// they are part of a True16 instruction or not, the operand encoding should be
+// the same for SGPR, imm, and VGPR_32 whether the instruction is True16 or not.
+class VOP3a_t16<VOPProfile P> : Enc64 {
+ bits<11> vdst;
+ bits<4> src0_modifiers;
+ bits<11> src0;
+ bits<3> src1_modifiers;
+ bits<11> src1;
+ bits<3> src2_modifiers;
+ bits<11> src2;
+ bits<1> clamp;
+ bits<2> omod;
+
+ let Inst{7-0} = !if(P.EmitDst, vdst{7-0}, 0);
+ let Inst{8} = !if(P.HasSrc0Mods, src0_modifiers{1}, 0);
+ let Inst{9} = !if(P.HasSrc1Mods, src1_modifiers{1}, 0);
+ let Inst{10} = !if(P.HasSrc2Mods, src2_modifiers{1}, 0);
+ // 16-bit select fields which can be interpreted as OpSel or hi/lo suffix
+ let Inst{11} = !if(P.HasSrc0Mods, src0_modifiers{2}, 0);
+ let Inst{12} = !if(P.HasSrc1Mods, src1_modifiers{2}, 0);
+ let Inst{13} = !if(P.HasSrc2Mods, src2_modifiers{2}, 0);
+ let Inst{14} = !if(!and(P.HasDst, P.HasSrc0Mods), src0_modifiers{3}, 0);
+ let Inst{15} = !if(P.HasClamp, clamp{0}, 0);
+
+ let Inst{31-26} = 0x35;
+ let Inst{40-32} = !if(P.HasSrc0, src0{8-0}, 0);
+ let Inst{49-41} = !if(P.HasSrc1, src1{8-0}, 0);
+ let Inst{58-50} = !if(P.HasSrc2, src2{8-0}, 0);
+ let Inst{60-59} = !if(P.HasOMod, omod, 0);
+ let Inst{61} = !if(P.HasSrc0Mods, src0_modifiers{0}, 0);
+ let Inst{62} = !if(P.HasSrc1Mods, src1_modifiers{0}, 0);
+ let Inst{63} = !if(P.HasSrc2Mods, src2_modifiers{0}, 0);
+}
+
class VOP3a_gfx6_gfx7<bits<9> op, VOPProfile p> : VOP3a<p> {
let Inst{11} = !if(p.HasClamp, clamp{0}, 0);
let Inst{25-17} = op;
@@ -272,6 +308,10 @@ class VOP3e_gfx10<bits<10> op, VOPProfile p> : VOP3a_gfx10<op, p> {
class VOP3e_gfx11_gfx12<bits<10> op, VOPProfile p> : VOP3e_gfx10<op, p>;
+class VOP3e_t16_gfx11_gfx12<bits<10> op, VOPProfile p> : VOP3a_t16<p> {
+ let Inst{25-16} = op;
+}
+
class VOP3e_vi <bits<10> op, VOPProfile P> : VOP3a_vi <op, P> {
bits<8> vdst;
let Inst{7-0} = !if(P.EmitDst, vdst{7-0}, 0);
@@ -736,7 +776,12 @@ class VOP3_DPPe_Fields : VOP3_DPPe_Fields_Base {
bits<8> src0;
}
+class VOP3_DPPe_Fields_t16 : VOP3_DPPe_Fields_Base {
+ bits<11> src0;
+}
+
// Common refers to common between DPP and DPP8
+// Base refers to a shared base between T16 and regular instructions
class VOP3_DPPe_Common_Base<bits<10> op, VOPProfile P> : Enc96 {
bits<4> src0_modifiers;
bits<3> src1_modifiers;
@@ -748,7 +793,7 @@ class VOP3_DPPe_Common_Base<bits<10> op, VOPProfile P> : Enc96 {
let Inst{8} = !if(P.HasSrc0Mods, src0_modifiers{1}, 0);
let Inst{9} = !if(P.HasSrc1Mods, src1_modifiers{1}, 0);
let Inst{10} = !if(P.HasSrc2Mods, src2_modifiers{1}, 0);
- // OPSEL must be set such that the low result only uses low inputs, and the high result only uses high inputs.
+ // 16-bit select fields which can be interpreted as OpSel or hi/lo suffix
let Inst{11} = !if(P.HasOpSel, !if(P.HasSrc0Mods, src0_modifiers{2}, 0),
!if(P.IsFP8SrcByteSel, byte_sel{1}, ?));
let Inst{12} = !if(P.HasOpSel, !if(P.HasSrc1Mods, src1_modifiers{2}, 0),
@@ -777,6 +822,16 @@ class VOP3_DPPe_Common<bits<10> op, VOPProfile P> : VOP3_DPPe_Common_Base<op, P>
let Inst{58-50} = !if(P.HasSrc2, src2, 0);
}
+class VOP3_DPPe_Common_t16<bits<10> op, VOPProfile P> : VOP3_DPPe_Common_Base<op, P> {
+ bits<11> vdst;
+ bits<11> src1;
+ bits<11> src2;
+
+ let Inst{7-0} = !if(P.EmitDst, vdst{7-0}, 0);
+ let Inst{49-41} = !if(P.HasSrc1, src1{8-0}, 0);
+ let Inst{58-50} = !if(P.HasSrc2, src2{8-0}, 0);
+}
+
class VOP3P_DPPe_Common_Base<bits<7> op, VOPProfile P> : Enc96 {
bits<4> src0_modifiers;
bits<4> src1_modifiers;
@@ -786,6 +841,7 @@ class VOP3P_DPPe_Common_Base<bits<7> op, VOPProfile P> : Enc96 {
let Inst{8} = !if(P.HasSrc0Mods, src0_modifiers{1}, 0); // neg_hi src0
let Inst{9} = !if(P.HasSrc1Mods, src1_modifiers{1}, 0); // neg_hi src1
let Inst{10} = !if(P.HasSrc2Mods, src2_modifiers{1}, 0); // neg_hi src2
+ // OPSEL must be set such that the low result only uses low inputs, and the high result only uses high inputs.
let Inst{11} = !if(!and(P.HasSrc0, P.HasOpSel), src0_modifiers{2}, 0); // op_sel(0)
let Inst{12} = !if(!and(P.HasSrc1, P.HasOpSel), src1_modifiers{2}, 0); // op_sel(1)
let Inst{13} = !if(!and(P.HasSrc2, P.HasOpSel), src2_modifiers{2}, 0); // op_sel(2)
@@ -810,6 +866,16 @@ class VOP3P_DPPe_Common<bits<7> op, VOPProfile P> : VOP3P_DPPe_Common_Base<op, P
let Inst{58-50} = !if(P.HasSrc2, src2, 0);
}
+class VOP3P_DPPe_Common_t16<bits<7> op, VOPProfile P> : VOP3P_DPPe_Common_Base<op, P> {
+ bits<11> vdst;
+ bits<11> src1;
+ bits<11> src2;
+
+ let Inst{7-0} = vdst{7-0};
+ let Inst{49-41} = !if(P.HasSrc1, src1{8-0}, 0);
+ let Inst{58-50} = !if(P.HasSrc2, src2{8-0}, 0);
+}
+
class VOP_DPP_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[],
dag Ins = P.InsDPP, string asmOps = P.AsmDPP> :
VOP_Pseudo<OpName, "_dpp", P, P.OutsDPP, Ins, asmOps, pattern> {
@@ -870,6 +936,7 @@ class VOP_DPP_Real <VOP_DPP_Pseudo ps, int EncodingFamily> :
// Copy relevant pseudo op flags
let isConvergent = ps.isConvergent;
let SubtargetPredicate = ps.SubtargetPredicate;
+ let True16Predicate = ps.True16Predicate;
let AssemblerPredicate = ps.AssemblerPredicate;
let OtherPredicates = ps.OtherPredicates;
let AsmMatchConverter = ps.AsmMatchConverter;
@@ -928,11 +995,29 @@ class VOP3_DPP_Base <string OpName, VOPProfile P, bit IsDPP16,
let Size = 12;
}
+class VOP3_DPP_Enc <bits<10> op, VOPProfile P, bit IsDPP16> :
+ VOP3_DPPe_Common<op, P>,
+ VOP3_DPPe_Fields {
+
+ let Inst{40-32} = 0xfa;
+ let Inst{71-64} = !if(P.HasSrc0, src0{7-0}, 0);
+ let Inst{80-72} = dpp_ctrl;
+ let Inst{82} = !if(IsDPP16, fi, ?);
+ let Inst{83} = bound_ctrl;
+
+ // Inst{87-84} ignored by hw
+ let Inst{91-88} = bank_mask;
+ let Inst{95-92} = row_mask;
+}
+
class VOP3_DPP <bits<10> op, string OpName, VOPProfile P, bit IsDPP16,
dag InsDPP = !if(IsDPP16, P.InsVOP3DPP16, P.InsVOP3DPP),
string AsmDPP = !if(IsDPP16, P.AsmVOP3DPP16, P.AsmVOP3DPP)> :
- VOP3_DPP_Base<OpName, P, IsDPP16, InsDPP, AsmDPP>, VOP3_DPPe_Common<op, P>,
- VOP3_DPPe_Fields {
+ VOP3_DPP_Base<OpName, P, IsDPP16, InsDPP, AsmDPP>, VOP3_DPP_Enc<op, P, IsDPP16>;
+
+class VOP3_DPP_Enc_t16<bits<10> op, VOPProfile P, bit IsDPP16 >
+ : VOP3_DPPe_Common_t16<op, P>,
+ VOP3_DPPe_Fields_t16 {
let Inst{40-32} = 0xfa;
let Inst{71-64} = !if(P.HasSrc0, src0{7-0}, 0);
@@ -945,6 +1030,13 @@ class VOP3_DPP <bits<10> op, string OpName, VOPProfile P, bit IsDPP16,
let Inst{95-92} = row_mask;
}
+class VOP3_DPP_t16<bits<10> op, string OpName, VOPProfile P, bit IsDPP16,
+ dag InsDPP = !if (IsDPP16, P.InsVOP3DPP16, P.InsVOP3DPP),
+ string AsmDPP = !if (IsDPP16, P.AsmVOP3DPP16, P.AsmVOP3DPP)>
+ : VOP3_DPP_Base<OpName, P, IsDPP16, InsDPP, AsmDPP>,
+ VOP3_DPP_Enc_t16<op, P, IsDPP16> {
+}
+
class VOP3P_DPP <bits<7> op, string OpName, VOPProfile P, bit IsDPP16,
dag InsDPP = !if(IsDPP16, P.InsVOP3DPP16, P.InsVOP3DPP),
string AsmDPP = !if(IsDPP16, P.AsmVOP3DPP16, P.AsmVOP3DPP)> :
@@ -979,6 +1071,12 @@ class VOP3_DPP8e_Fields {
bits<9> fi;
}
+class VOP3_DPP8e_Fields_t16 {
+ bits<11> src0;
+ bits<24> dpp8;
+ bits<9> fi;
+}
+
class VOP_DPP8_Base<string OpName, VOPProfile P, dag InsDPP8 = P.InsDPP8, string AsmDPP8 = P.AsmDPP8> :
InstSI<P.OutsDPP8, InsDPP8, OpName#AsmDPP8, []> {
@@ -1011,16 +1109,28 @@ class VOP3_DPP8_Base<string OpName, VOPProfile P> :
let Size = 12;
}
+class VOP3_DPP8_Enc <bits<10> op, VOPProfile P> :
+ VOP3_DPPe_Common<op, P>,
+ VOP3_DPP8e_Fields {
+ let Inst{40-32} = fi;
+ let Inst{71-64} = !if(P.HasSrc0, src0{7-0}, 0);
+ let Inst{95-72} = dpp8{23-0};
+}
class VOP3_DPP8<bits<10> op, string OpName, VOPProfile P> :
- VOP3_DPP8_Base<OpName, P>, VOP3_DPPe_Common<op, P>,
- VOP3_DPP8e_Fields {
+ VOP3_DPP8_Base<OpName, P>, VOP3_DPP8_Enc<op, P>;
+class VOP3_DPP8_Enc_t16 <bits<10> op, VOPProfile P> :
+ VOP3_DPPe_Common_t16<op, P>,
+ VOP3_DPP8e_Fields_t16 {
let Inst{40-32} = fi;
let Inst{71-64} = !if(P.HasSrc0, src0{7-0}, 0);
let Inst{95-72} = dpp8{23-0};
}
+class VOP3_DPP8_t16<bits<10> op, string OpName, VOPProfile P> :
+ VOP3_DPP8_Base<OpName, P>, VOP3_DPP8_Enc_t16<op, P>;
+
class VOP3P_DPP8<bits<7> op, string OpName, VOPProfile P> :
VOP3_DPP8_Base<OpName, P>, VOP3P_DPPe_Common<op, P>,
VOP3_DPP8e_Fields {
@@ -1273,6 +1383,32 @@ class VOP3_Profile<VOPProfile P, VOP3Features Features = VOP3_REGULAR> : VOP3_Pr
}
+class VOP3_Profile_True16<VOPProfile P, VOP3Features Features = VOP3_REGULAR> : VOPProfile_True16<P> {
+
+ let HasClamp = !if(Features.HasClamp, 1, P.HasClamp);
+ let HasOpSel = !if(Features.HasOpSel, 1, P.HasOpSel);
+ let IsMAI = !if(Features.IsMAI, 1, P.IsMAI);
+ let IsPacked = !if(Features.IsPacked, 1, P.IsPacked);
+
+ let HasModifiers =
+ !if (Features.IsMAI, 0,
+ !or(Features.IsPacked, Features.HasOpSel, P.HasModifiers));
+ let IsSingle = 1;
+}
+
+class VOP3_Profile_Fake16<VOPProfile P, VOP3Features Features = VOP3_REGULAR> : VOPProfile_Fake16<P> {
+
+ let HasClamp = !if(Features.HasClamp, 1, P.HasClamp);
+ let HasOpSel = !if(Features.HasOpSel, 1, P.HasOpSel);
+ let IsMAI = !if(Features.IsMAI, 1, P.IsMAI);
+ let IsPacked = !if(Features.IsPacked, 1, P.IsPacked);
+
+ let HasModifiers =
+ !if (Features.IsMAI, 0,
+ !or(Features.IsPacked, Features.HasOpSel, P.HasModifiers));
+ let IsSingle = 1;
+}
+
// consistently gives instructions a _e64 suffix
multiclass VOP3Inst_Pseudo_Wrapper<string opName, VOPProfile P, list<dag> pattern = [], bit VOP3Only = 0> {
def _e64 : VOP3_Pseudo<opName, P, pattern, VOP3Only>;
@@ -1325,6 +1461,28 @@ multiclass VOP3PseudoScalarInst<string OpName, VOPProfile P,
i32:$omod))))]>;
}
+multiclass VOP3Inst_t16_with_profiles<string OpName, VOPProfile P, VOPProfile P_t16,
+ VOPProfile P_fake16,
+ SDPatternOperator node = null_frag,
+ SDPatternOperator node_t16 = node> {
+ let True16Predicate = NotHasTrue16BitInsts in {
+ defm NAME : VOP3Inst<OpName, P, node>;
+ }
+ let True16Predicate = UseRealTrue16Insts in {
+ defm _t16 : VOP3Inst<OpName # "_t16", P_t16, node_t16>;
+ }
+ let True16Predicate = UseFakeTrue16Insts in {
+ defm _fake16 : VOP3Inst<OpName # "_fake16", P_fake16, node>;
+ }
+}
+
+multiclass VOP3Inst_t16<string OpName, VOPProfile P,
+ SDPatternOperator node = null_frag,
+ SDPatternOperator node_t16 = node>
+ : VOP3Inst_t16_with_profiles<OpName, VOP3_Profile<P, VOP3_OPSEL>,
+ VOP3_Profile_True16<P, VOP3_OPSEL>, VOP3_Profile_Fake16<P, VOP3_OPSEL>,
+ node, node_t16>;
+
//===----------------------------------------------------------------------===//
// VOP3 DPP
//===----------------------------------------------------------------------===//
@@ -1342,17 +1500,43 @@ class Base_VOP3_DPP16<bits<10> op, VOP_DPP_Pseudo ps, string opName = ps.OpName>
let OtherPredicates = ps.OtherPredicates;
}
+class Base_VOP3_DPP16_t16<bits<10> op, VOP_DPP_Pseudo ps,
+ string opName = ps.OpName>
+ : VOP3_DPP_t16<op, opName, ps.Pfl, 1> {
+ let VOP3_OPSEL = ps.Pfl.HasOpSel;
+ let IsDOT = ps.IsDOT;
+ let hasSideEffects = ps.hasSideEffects;
+ let Defs = ps.Defs;
+ let SchedRW = ps.SchedRW;
+ let Uses = ps.Uses;
+ let AssemblerPredicate = HasDPP16;
+ let SubtargetPredicate = HasDPP16;
+ let OtherPredicates = ps.OtherPredicates;
+}
+
class VOP3_DPP16<bits<10> op, VOP_DPP_Pseudo ps, int subtarget,
string opName = ps.OpName>
: Base_VOP3_DPP16<op, ps, opName>, SIMCInstr<ps.PseudoInstr, subtarget>;
+class VOP3_DPP16_t16<bits<10> op, VOP_DPP_Pseudo ps, int subtarget,
+ string opName = ps.OpName>
+ : Base_VOP3_DPP16_t16<op, ps, opName>, SIMCInstr<ps.PseudoInstr, subtarget>;
+
class VOP3_DPP16_Gen<bits<10> op, VOP_DPP_Pseudo ps, GFXGen Gen,
- string opName = ps.OpName> :
- VOP3_DPP16 <op, ps, Gen.Subtarget, opName> {
+ string opName = ps.OpName>
+ : VOP3_DPP16 <op, ps, Gen.Subtarget, opName> {
let AssemblerPredicate = Gen.AssemblerPredicate;
- let True16Predicate = !if(ps.Pfl.IsRealTrue16, UseRealTrue16Insts, NoTrue16Predicate);
- let DecoderNamespace = Gen.DecoderNamespace#
- !if(ps.Pfl.IsRealTrue16, "", "_FAKE16");
+ let DecoderNamespace = Gen.DecoderNamespace;
+}
+
+class VOP3_DPP16_Gen_t16<bits<10> op, VOP_DPP_Pseudo ps, GFXGen Gen,
+ string opName = ps.OpName>
+ : VOP3_DPP16_t16<op, ps, Gen.Subtarget, opName> {
+ let True16Predicate =
+ !if (ps.Pfl.IsRealTrue16, UseRealTrue16Insts, NoTrue16Predicate);
+ let AssemblerPredicate = Gen.AssemblerPredicate;
+ let DecoderNamespace =
+ Gen.DecoderNamespace #!if (ps.Pfl.IsRealTrue16, "", "_FAKE16");
}
class Base_VOP3_DPP8<bits<10> op, VOP_Pseudo ps, string opName = ps.OpName>
@@ -1366,6 +1550,20 @@ class Base_VOP3_DPP8<bits<10> op, VOP_Pseudo ps, string opName = ps.OpName>
let SubtargetPredicate = ps.SubtargetPredicate;
let OtherPredicates = ps.OtherPredicates;
+ let True16Predicate = ps.True16Predicate;
+}
+
+class Base_VOP3_DPP8_t16<bits<10> op, VOP_Pseudo ps, string opName = ps.OpName>
+ : VOP3_DPP8_t16<op, opName, ps.Pfl> {
+ let VOP3_OPSEL = ps.Pfl.HasOpSel;
+ let IsDOT = ps.IsDOT;
+ let hasSideEffects = ps.hasSideEffects;
+ let Defs = ps.Defs;
+ let SchedRW = ps.SchedRW;
+ let Uses = ps.Uses;
+
+ let OtherPredicates = ps.OtherPredicates;
+ let True16Predicate = ps.True16Predicate;
}
class Base_VOP3b_DPP16<bits<10> op, VOP_DPP_Pseudo ps,
@@ -1381,6 +1579,12 @@ class VOP3b_DPP8_Base<bits<10> op, VOP_Pseudo ps, string opName = ps.OpName>
let Inst{14 - 8} = sdst;
}
+class VOP3b_DPP8_Base_t16<bits<10> op, VOP_Pseudo ps, string opName = ps.OpName>
+ : Base_VOP3_DPP8<op, ps, opName> {
+ bits<8> sdst;
+ let Inst{14 - 8} = sdst{7-1};
+}
+
//===----------------------------------------------------------------------===//
// VOP3 GFX11, GFX12
//===----------------------------------------------------------------------===//
@@ -1420,10 +1624,11 @@ multiclass VOP3Dot_Real_Base<GFXGen Gen, bits<10> op, string opName = NAME,
}
multiclass VOP3_Real_with_name<GFXGen Gen, bits<10> op, string opName,
- string asmName, bit isSingle = 0> {
+ string asmName, string pseudo_mnemonic = "", bit isSingle = 0> {
defvar ps = !cast<VOP_Pseudo>(opName#"_e64");
let AsmString = asmName # ps.AsmOperands,
IsSingle = !or(isSingle, ps.Pfl.IsSingle) in {
+ // FIXME-TRUE16 support FP8 instructions properly
if ps.Pfl.IsFP8SrcByteSel then {
def _e64#Gen.Suffix :
VOP3_Real_Gen<ps, Gen>,
@@ -1432,17 +1637,27 @@ multiclass VOP3_Real_with_name<GFXGen Gen, bits<10> op, string opName,
def _e64#Gen.Suffix :
VOP3_Real_Gen<ps, Gen>,
VOP3FP8OpSel_dst_bytesel_gfx11_gfx12<op, ps.Pfl>;
- } else if ps.Pfl.HasOpSel then {
- def _e64#Gen.Suffix :
- VOP3_Real_Gen<ps, Gen>,
- VOP3OpSel_gfx11_gfx12<op, ps.Pfl>;
} else {
- def _e64#Gen.Suffix :
- VOP3_Real_Gen<ps, Gen>,
- VOP3e_gfx11_gfx12<op, ps.Pfl>;
+ if ps.Pfl.IsRealTrue16 then {
+ def _e64#Gen.Suffix :
+ VOP3_Real_Gen<ps, Gen>,
+ VOP3e_t16_gfx11_gfx12<op, ps.Pfl>;
+ } else {
+ if ps.Pfl.HasOpSel then {
+ def _e64#Gen.Suffix :
+ VOP3_Real_Gen<ps, Gen>,
+ VOP3OpSel_gfx11_gfx12<op, ps.Pfl>;
+ } else {
+ def _e64#Gen.Suffix :
+ VOP3_Real_Gen<ps, Gen>,
+ VOP3e_gfx11_gfx12<op, ps.Pfl>;
+ }
+ }
}
}
- def Gen.Suffix#"_VOP3_alias" : LetDummies, AMDGPUMnemonicAlias<ps.Mnemonic, asmName> {
+ def Gen.Suffix#"_VOP3_alias" : LetDummies,
+ AMDGPUMnemonicAlias<!if(!empty(pseudo_mnemonic),
+ ps.Mnemonic, pseudo_mnemonic), asmName, ""> {
let AssemblerPredicate = Gen.AssemblerPredicate;
}
}
@@ -1456,8 +1671,13 @@ multiclass VOP3_Real_No_Suffix<GFXGen Gen, bits<10> op, string opName = NAME> {
}
multiclass VOP3_Real_dpp_Base<GFXGen Gen, bits<10> op, string opName = NAME> {
- def _e64_dpp#Gen.Suffix :
- VOP3_DPP16_Gen<op, !cast<VOP_DPP_Pseudo>(opName#"_e64"#"_dpp"), Gen>;
+ defvar ps = !cast<VOP_DPP_Pseudo>(opName#"_e64"#"_dpp");
+ if ps.Pfl.IsTrue16 then
+ def _e64_dpp#Gen.Suffix :
+ VOP3_DPP16_Gen_t16<op, ps, Gen>;
+ else
+ def _e64_dpp#Gen.Suffix :
+ VOP3_DPP16_Gen<op, ps, Gen>;
}
multiclass VOP3Dot_Real_dpp_Base<GFXGen Gen, bits<10> op, string opName = NAME> {
@@ -1552,18 +1772,14 @@ multiclass VOP3Only_Realtriple<GFXGen Gen, bits<10> op> :
VOP3_Realtriple<Gen, op, 1>;
multiclass VOP3_Realtriple_with_name<GFXGen Gen, bits<10> op, string opName,
- string asmName, bit isSingle = 0> :
- VOP3_Real_with_name<Gen, op, opName, asmName, isSingle>,
+ string asmName, string pseudo_mnemonic = "", bit isSingle = 0> :
+ VOP3_Real_with_name<Gen, op, opName, asmName, pseudo_mnemonic, isSingle>,
VOP3_Real_dpp_with_name<Gen, op, opName, asmName>,
VOP3_Real_dpp8_with_name<Gen, op, opName, asmName>;
multiclass VOP3Only_Realtriple_with_name<GFXGen Gen, bits<10> op, string opName,
string asmName> :
- VOP3_Realtriple_with_name<Gen, op, opName, asmName, 1>;
-
-multiclass VOP3Only_Realtriple_t16<GFXGen Gen, bits<10> op, string asmName,
- string opName = NAME>
- : VOP3Only_Realtriple_with_name<Gen, op, opName, asmName>;
+ VOP3_Realtriple_with_name<Gen, op, opName, asmName, "", 1>;
multiclass VOP3be_Realtriple<
GFXGen Gen, bits<10> op, bit isSingle = 0, string opName = NAME,
@@ -1579,6 +1795,16 @@ multiclass VOP3beOnly_Realtriple<GFXGen Gen, bits<10> op> :
// VOP3 GFX11
//===----------------------------------------------------------------------===//
+// VOP1 and VOP2 depend on these triple defs
+
+multiclass VOP3_Realtriple_t16_gfx11<bits<10> op, string asmName, string opName = NAME,
+ string pseudo_mnemonic = "", bit isSingle = 0> :
+ VOP3_Realtriple_with_name<GFX11Gen, op, opName, asmName, pseudo_mnemonic, isSingle>;
+
+multiclass VOP3Only_Realtriple_t16_gfx11<bits<10> op, string asmName,
+ string opName = NAME, string pseudo_mnemonic = "">
+ : VOP3_Realtriple_t16_gfx11<op, asmName, opName, pseudo_mnemonic, 1>;
+
multiclass VOP3be_Real_gfx11<bits<10> op, string opName, string asmName,
bit isSingle = 0> :
VOP3be_Real<GFX11Gen, op, opName, asmName, isSingle>;
@@ -1591,10 +1817,6 @@ multiclass VOP3_Realtriple_gfx11<bits<10> op, bit isSingle = 0,
string opName = NAME> :
VOP3_Realtriple<GFX11Gen, op, isSingle, opName>;
-multiclass VOP3Only_Realtriple_t16_gfx11<bits<10> op, string asmName,
- string opName = NAME>
- : VOP3Only_Realtriple_with_name<GFX11Gen, op, opName, asmName>;
-
//===----------------------------------------------------------------------===//
// VOP3 GFX12
//===----------------------------------------------------------------------===//
@@ -1610,6 +1832,16 @@ multiclass VOP3Only_Real_Base_gfx12<bits<10> op> :
multiclass VOP3Only_Realtriple_t16_gfx12<bits<10> op> :
VOP3Only_Realtriple<GFX12Gen, op>;
+multiclass VOP3_Realtriple_t16_gfx12<bits<10> op, string asmName, string opName = NAME,
+ string pseudo_mnemonic = "", bit isSingle = 0> :
+ VOP3_Realtriple_with_name<GFX12Gen, op, opName, asmName, pseudo_mnemonic, isSingle>;
+
+multiclass VOP3_Realtriple_t16_and_f16_gfx12<bits<10> op, string asmName, string opName = NAME,
+ string pseudo_mnemonic = "", bit isSingle = 0> {
+ defm opName#"_t16":VOP3_Realtriple_t16_gfx12<op, asmName, opName#"_t16", pseudo_mnemonic, isSingle>;
+ defm opName#"_fake16":VOP3_Realtriple_t16_gfx12<op, asmName, opName#"_fake16", pseudo_mnemonic, isSingle>;
+}
+
multiclass VOP3be_Real_with_name_gfx12<bits<10> op, string opName,
string asmName, bit isSingle = 0> {
defvar ps = !cast<VOP3_Pseudo>(opName#"_e64");
@@ -1624,18 +1856,14 @@ multiclass VOP3be_Real_with_name_gfx12<bits<10> op, string opName,
}
multiclass VOP3_Realtriple_with_name_gfx12<bits<10> op, string opName,
- string asmName, bit isSingle = 0> :
- VOP3_Realtriple_with_name<GFX12Gen, op, opName, asmName, isSingle>;
+ string asmName, string pseudo_mnemonic = "", bit isSingle = 0> :
+ VOP3_Realtriple_with_name<GFX12Gen, op, opName, asmName, pseudo_mnemonic, isSingle>;
multiclass VOP3Only_Realtriple_with_name_gfx11_gfx12<bits<10> op, string opName,
string asmName> :
VOP3Only_Realtriple_with_name<GFX11Gen, op, opName, asmName>,
VOP3Only_Realtriple_with_name<GFX12Gen, op, opName, asmName>;
-multiclass VOP3Only_Realtriple_with_name_t16_gfx12<bits<10> op, string asmName,
- string opName = NAME>
- : VOP3Only_Realtriple_with_name<GFX12Gen, op, opName, asmName>;
-
//===----------------------------------------------------------------------===//
include "VOPCInstructions.td"
@@ -1705,4 +1933,4 @@ def VOPTrue16Table : GenericTable {
let PrimaryKey = ["Opcode"];
let PrimaryKeyName = "getTrue16OpcodeHelper";
-}
\ No newline at end of file
+}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.s16.mir
index 11411c691c3901..1971cd80d5686b 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.s16.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.s16.mir
@@ -2,7 +2,7 @@
# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX6 %s
# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX6 %s
# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX10 %s
-# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX10 %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX11 %s
# Note: 16-bit instructions generally produce a 0 result in the high 16-bits on GFX8 and GFX9 and preserve high 16 bits on GFX10+
@@ -23,6 +23,7 @@ body: |
; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX6-NEXT: [[V_ADD_U16_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U16_e64 [[COPY]], [[COPY1]], 0, implicit $exec
; GFX6-NEXT: S_ENDPGM 0, implicit [[V_ADD_U16_e64_]]
+ ;
; GFX10-LABEL: name: add_s16
; GFX10: liveins: $vgpr0, $vgpr1
; GFX10-NEXT: {{ $}}
@@ -30,6 +31,14 @@ body: |
; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX10-NEXT: [[V_ADD_NC_U16_e64_:%[0-9]+]]:vgpr_32 = V_ADD_NC_U16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $exec
; GFX10-NEXT: S_ENDPGM 0, implicit [[V_ADD_NC_U16_e64_]]
+ ;
+ ; GFX11-LABEL: name: add_s16
+ ; GFX11: liveins: $vgpr0, $vgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX11-NEXT: [[V_ADD_NC_U16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_ADD_NC_U16_fake16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_ADD_NC_U16_fake16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_TRUNC %0
@@ -56,6 +65,7 @@ body: |
; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX6-NEXT: [[V_ADD_U16_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U16_e64 [[COPY]], [[COPY1]], 0, implicit $exec
; GFX6-NEXT: S_ENDPGM 0, implicit [[V_ADD_U16_e64_]]
+ ;
; GFX10-LABEL: name: add_s16_zext_to_s32
; GFX10: liveins: $vgpr0, $vgpr1
; GFX10-NEXT: {{ $}}
@@ -65,6 +75,16 @@ body: |
; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
; GFX10-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[V_ADD_NC_U16_e64_]], implicit $exec
; GFX10-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e64_]]
+ ;
+ ; GFX11-LABEL: name: add_s16_zext_to_s32
+ ; GFX11: liveins: $vgpr0, $vgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX11-NEXT: [[V_ADD_NC_U16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_ADD_NC_U16_fake16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $exec
+ ; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
+ ; GFX11-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[V_ADD_NC_U16_fake16_e64_]], implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_TRUNC %0
@@ -91,12 +111,20 @@ body: |
; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX6-NEXT: [[V_SUB_U16_e64_:%[0-9]+]]:vgpr_32 = V_SUB_U16_e64 [[COPY]], 64, 0, implicit $exec
; GFX6-NEXT: S_ENDPGM 0, implicit [[V_SUB_U16_e64_]]
+ ;
; GFX10-LABEL: name: add_s16_neg_inline_const_64
; GFX10: liveins: $vgpr0
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX10-NEXT: [[V_SUB_NC_U16_e64_:%[0-9]+]]:vgpr_32 = V_SUB_NC_U16_e64 0, [[COPY]], 0, 64, 0, 0, implicit $exec
; GFX10-NEXT: S_ENDPGM 0, implicit [[V_SUB_NC_U16_e64_]]
+ ;
+ ; GFX11-LABEL: name: add_s16_neg_inline_const_64
+ ; GFX11: liveins: $vgpr0
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-NEXT: [[V_SUB_NC_U16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_SUB_NC_U16_fake16_e64 0, [[COPY]], 0, 64, 0, 0, implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_SUB_NC_U16_fake16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s16) = G_TRUNC %0
%2:vgpr(s16) = G_CONSTANT i16 -64
@@ -121,6 +149,7 @@ body: |
; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX6-NEXT: [[V_SUB_U16_e64_:%[0-9]+]]:vgpr_32 = V_SUB_U16_e64 [[COPY]], 64, 0, implicit $exec
; GFX6-NEXT: S_ENDPGM 0, implicit [[V_SUB_U16_e64_]]
+ ;
; GFX10-LABEL: name: add_s16_neg_inline_const_64_zext_to_s32
; GFX10: liveins: $vgpr0
; GFX10-NEXT: {{ $}}
@@ -129,6 +158,15 @@ body: |
; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
; GFX10-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[V_SUB_NC_U16_e64_]], implicit $exec
; GFX10-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e64_]]
+ ;
+ ; GFX11-LABEL: name: add_s16_neg_inline_const_64_zext_to_s32
+ ; GFX11: liveins: $vgpr0
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-NEXT: [[V_SUB_NC_U16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_SUB_NC_U16_fake16_e64 0, [[COPY]], 0, 64, 0, 0, implicit $exec
+ ; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
+ ; GFX11-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[V_SUB_NC_U16_fake16_e64_]], implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s16) = G_TRUNC %0
%2:vgpr(s16) = G_CONSTANT i16 -64
diff --git a/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir b/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir
index 1151bde02ef62c..41b61f2e09a3d3 100644
--- a/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir
+++ b/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir
@@ -82,9 +82,9 @@ body: |
# Regression test for src_modifiers on base u16 opcode
# GCN-LABEL: name: vop3_u16
-# GCN: %5:vgpr_32 = V_ADD_NC_U16_e64_dpp %3, 0, %1, 0, %3, 0, 0, 1, 15, 15, 1, implicit $exec
-# GCN: %7:vgpr_32 = V_ADD_NC_U16_e64_dpp %3, 1, %5, 2, %5, 0, 0, 1, 15, 15, 1, implicit $exec
-# GCN: %9:vgpr_32 = V_ADD_NC_U16_e64 4, %8, 8, %7, 0, 0, implicit $exec
+# GCN: %5:vgpr_32 = V_ADD_NC_U16_fake16_e64_dpp %3, 0, %1, 0, %3, 0, 0, 1, 15, 15, 1, implicit $exec
+# GCN: %7:vgpr_32 = V_ADD_NC_U16_fake16_e64_dpp %3, 1, %5, 2, %5, 0, 0, 1, 15, 15, 1, implicit $exec
+# GCN: %9:vgpr_32 = V_ADD_NC_U16_fake16_e64 4, %8, 8, %7, 0, 0, implicit $exec
name: vop3_u16
tracksRegLiveness: true
body: |
@@ -96,11 +96,11 @@ body: |
%2:vgpr_32 = COPY $vgpr2
%3:vgpr_32 = IMPLICIT_DEF
%4:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
- %5:vgpr_32 = V_ADD_NC_U16_e64 0, %4, 0, %3, 0, 0, implicit $exec
+ %5:vgpr_32 = V_ADD_NC_U16_fake16_e64 0, %4, 0, %3, 0, 0, implicit $exec
%6:vgpr_32 = V_MOV_B32_dpp %3, %5, 1, 15, 15, 1, implicit $exec
- %7:vgpr_32 = V_ADD_NC_U16_e64 1, %6, 2, %5, 0, 0, implicit $exec
+ %7:vgpr_32 = V_ADD_NC_U16_fake16_e64 1, %6, 2, %5, 0, 0, implicit $exec
%8:vgpr_32 = V_MOV_B32_dpp %3, %7, 1, 15, 15, 1, implicit $exec
- %9:vgpr_32 = V_ADD_NC_U16_e64 4, %8, 8, %7, 0, 0, implicit $exec
+ %9:vgpr_32 = V_ADD_NC_U16_fake16_e64 4, %8, 8, %7, 0, 0, implicit $exec
...
name: vop3p
@@ -880,11 +880,11 @@ body: |
# Check op_sel is all 0s when combining
# GCN-LABEL: name: opsel_vop3
-# GCN: %4:vgpr_32 = V_ADD_I16_e64_dpp %2, 0, %0, 0, %1, 0, 0, 1, 15, 15, 1, implicit $exec
-# GCN: %6:vgpr_32 = V_ADD_I16_e64 4, %5, 0, %1, 0, 0, implicit $exec
-# GCN: %8:vgpr_32 = V_ADD_I16_e64 0, %7, 4, %1, 0, 0, implicit $exec
-# GCN: %10:vgpr_32 = V_ADD_I16_e64 4, %9, 4, %1, 0, 0, implicit $exec
-# GCN: %12:vgpr_32 = V_ADD_I16_e64 8, %11, 0, %1, 0, 0, implicit $exec
+# GCN: %4:vgpr_32 = V_ADD_I16_fake16_e64_dpp %2, 0, %0, 0, %1, 0, 0, 1, 15, 15, 1, implicit $exec
+# GCN: %6:vgpr_32 = V_ADD_I16_fake16_e64 4, %5, 0, %1, 0, 0, implicit $exec
+# GCN: %8:vgpr_32 = V_ADD_I16_fake16_e64 0, %7, 4, %1, 0, 0, implicit $exec
+# GCN: %10:vgpr_32 = V_ADD_I16_fake16_e64 4, %9, 4, %1, 0, 0, implicit $exec
+# GCN: %12:vgpr_32 = V_ADD_I16_fake16_e64 8, %11, 0, %1, 0, 0, implicit $exec
name: opsel_vop3
tracksRegLiveness: true
body: |
@@ -897,23 +897,23 @@ body: |
; Combine for op_sel:[0,0,0]
%3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
- %4:vgpr_32 = V_ADD_I16_e64 0, %3, 0, %1, 0, 0, implicit $exec
+ %4:vgpr_32 = V_ADD_I16_fake16_e64 0, %3, 0, %1, 0, 0, implicit $exec
; Do not combine for op_sel:[1,0,0]
%5:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
- %6:vgpr_32 = V_ADD_I16_e64 4, %5, 0, %1, 0, 0, implicit $exec
+ %6:vgpr_32 = V_ADD_I16_fake16_e64 4, %5, 0, %1, 0, 0, implicit $exec
; Do not combine for op_sel:[0,1,0]
%7:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
- %8:vgpr_32 = V_ADD_I16_e64 0, %7, 4, %1, 0, 0, implicit $exec
+ %8:vgpr_32 = V_ADD_I16_fake16_e64 0, %7, 4, %1, 0, 0, implicit $exec
; Do not combine for op_sel:[1,1,0]
%9:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
- %10:vgpr_32 = V_ADD_I16_e64 4, %9, 4, %1, 0, 0, implicit $exec
+ %10:vgpr_32 = V_ADD_I16_fake16_e64 4, %9, 4, %1, 0, 0, implicit $exec
; Do not combine for op_sel:[0,0,1] (dst_op_sel only)
%11:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
- %12:vgpr_32 = V_ADD_I16_e64 8, %11, 0, %1, 0, 0, implicit $exec
+ %12:vgpr_32 = V_ADD_I16_fake16_e64 8, %11, 0, %1, 0, 0, implicit $exec
...
# Check op_sel is all 0s and op_sel_hi is all 1s when combining
diff --git a/llvm/test/CodeGen/AMDGPU/isel-amdgpu-cs-chain-preserve-cc.ll b/llvm/test/CodeGen/AMDGPU/isel-amdgpu-cs-chain-preserve-cc.ll
index c62b4e565078e4..2e2a1094ba99ae 100644
--- a/llvm/test/CodeGen/AMDGPU/isel-amdgpu-cs-chain-preserve-cc.ll
+++ b/llvm/test/CodeGen/AMDGPU/isel-amdgpu-cs-chain-preserve-cc.ll
@@ -996,7 +996,7 @@ define amdgpu_cs_chain_preserve void @amdgpu_cs_chain_preserve_cc_i16(i16 inreg
; GISEL-GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr8
; GISEL-GFX11-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
; GISEL-GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]]
- ; GISEL-GFX11-NEXT: [[V_ADD_NC_U16_e64_:%[0-9]+]]:vgpr_32 = V_ADD_NC_U16_e64 0, [[COPY2]], 0, [[COPY1]], 0, 0, implicit $exec
+ ; GISEL-GFX11-NEXT: [[V_ADD_NC_U16_e64_:%[0-9]+]]:vgpr_32 = V_ADD_NC_U16_fake16_e64 0, [[COPY2]], 0, [[COPY1]], 0, 0, implicit $exec
; GISEL-GFX11-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[DEF]]
; GISEL-GFX11-NEXT: FLAT_STORE_SHORT [[COPY3]], [[V_ADD_NC_U16_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s16) into `ptr poison`)
; GISEL-GFX11-NEXT: S_ENDPGM 0
@@ -1020,7 +1020,7 @@ define amdgpu_cs_chain_preserve void @amdgpu_cs_chain_preserve_cc_i16(i16 inreg
; DAGISEL-GFX11-WF32-NEXT: {{ $}}
; DAGISEL-GFX11-WF32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr8
; DAGISEL-GFX11-WF32-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0
- ; DAGISEL-GFX11-WF32-NEXT: [[V_ADD_NC_U16_e64_:%[0-9]+]]:vgpr_32 = V_ADD_NC_U16_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $exec
+ ; DAGISEL-GFX11-WF32-NEXT: [[V_ADD_NC_U16_e64_:%[0-9]+]]:vgpr_32 = V_ADD_NC_U16_fake16_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $exec
; DAGISEL-GFX11-WF32-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
; DAGISEL-GFX11-WF32-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY [[DEF]]
; DAGISEL-GFX11-WF32-NEXT: FLAT_STORE_SHORT killed [[COPY2]], killed [[V_ADD_NC_U16_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s16) into `ptr poison`)
@@ -1032,7 +1032,7 @@ define amdgpu_cs_chain_preserve void @amdgpu_cs_chain_preserve_cc_i16(i16 inreg
; DAGISEL-GFX11-WF64-NEXT: {{ $}}
; DAGISEL-GFX11-WF64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr8
; DAGISEL-GFX11-WF64-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0
- ; DAGISEL-GFX11-WF64-NEXT: [[V_ADD_NC_U16_e64_:%[0-9]+]]:vgpr_32 = V_ADD_NC_U16_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $exec
+ ; DAGISEL-GFX11-WF64-NEXT: [[V_ADD_NC_U16_e64_:%[0-9]+]]:vgpr_32 = V_ADD_NC_U16_fake16_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $exec
; DAGISEL-GFX11-WF64-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
; DAGISEL-GFX11-WF64-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY [[DEF]]
; DAGISEL-GFX11-WF64-NEXT: FLAT_STORE_SHORT killed [[COPY2]], killed [[V_ADD_NC_U16_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s16) into `ptr poison`)
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_opsel.s b/llvm/test/MC/AMDGPU/gfx11_asm_opsel.s
new file mode 100644
index 00000000000000..b7b71e696cd102
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_opsel.s
@@ -0,0 +1,48 @@
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX11 %s
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX11 %s
+
+v_add_nc_i16 v5.h, null, exec_lo op_sel:[1,1,1]
+// GFX11: encoding: [0x05,0x58,0x0d,0xd7,0x7c,0xfc,0x00,0x00]
+
+v_add_nc_i16 v5.l, -1, exec_hi op_sel:[0,0,0]
+// GFX11: encoding: [0x05,0x00,0x0d,0xd7,0xc1,0xfe,0x00,0x00]
+
+v_add_nc_i16 v5.l, 0.5, m0 op_sel:[1,0,0]
+// GFX11: encoding: [0x05,0x08,0x0d,0xd7,0xf0,0xfa,0x00,0x00]
+
+v_add_nc_i16 v5.l, src_scc, vcc_lo op_sel:[0,1,0]
+// GFX11: encoding: [0x05,0x10,0x0d,0xd7,0xfd,0xd4,0x00,0x00]
+
+v_add_nc_i16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp
+// GFX11: encoding: [0xff,0xc0,0x0d,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+
+v_sub_nc_i16 v5.h, null, exec_lo op_sel:[1,1,1]
+// GFX11: encoding: [0x05,0x58,0x0e,0xd7,0x7c,0xfc,0x00,0x00]
+
+v_sub_nc_i16 v5.l, -1, exec_hi op_sel:[0,0,0]
+// GFX11: encoding: [0x05,0x00,0x0e,0xd7,0xc1,0xfe,0x00,0x00]
+
+v_sub_nc_i16 v5.l, 0.5, m0 op_sel:[1,0,0]
+// GFX11: encoding: [0x05,0x08,0x0e,0xd7,0xf0,0xfa,0x00,0x00]
+
+v_sub_nc_i16 v5.l, src_scc, vcc_lo op_sel:[0,1,0]
+// GFX11: encoding: [0x05,0x10,0x0e,0xd7,0xfd,0xd4,0x00,0x00]
+
+v_sub_nc_i16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp
+// GFX11: encoding: [0xff,0xc0,0x0e,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+
+v_sub_nc_u16 v5.h, null, exec_lo op_sel:[1,1,1]
+// GFX11: encoding: [0x05,0x58,0x04,0xd7,0x7c,0xfc,0x00,0x00]
+
+v_sub_nc_u16 v5.l, -1, exec_hi op_sel:[0,0,0]
+// GFX11: encoding: [0x05,0x00,0x04,0xd7,0xc1,0xfe,0x00,0x00]
+
+v_sub_nc_u16 v5.l, 0.5, m0 op_sel:[1,0,0]
+// GFX11: encoding: [0x05,0x08,0x04,0xd7,0xf0,0xfa,0x00,0x00]
+
+v_sub_nc_u16 v5.l, src_scc, vcc_lo op_sel:[0,1,0]
+// GFX11: encoding: [0x05,0x10,0x04,0xd7,0xfd,0xd4,0x00,0x00]
+
+v_sub_nc_u16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp
+// GFX11: encoding: [0xff,0xc0,0x04,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s
index 67038f4c8eec09..6b6d411982ebc4 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s
@@ -244,50 +244,59 @@ v_add_lshl_u32 v5, src_scc, vcc_lo, -1
v_add_lshl_u32 v255, 0xaf123456, vcc_hi, null
// GFX11: encoding: [0xff,0x00,0x47,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf]
-v_add_nc_i16 v5, v1, v2
-// GFX11: encoding: [0x05,0x00,0x0d,0xd7,0x01,0x05,0x02,0x00]
+v_add_nc_i16 v5.l, v1.h, v2.l
+// GFX11: [0x05,0x08,0x0d,0xd7,0x01,0x05,0x02,0x00]
-v_add_nc_i16 v5, v255, v255
-// GFX11: encoding: [0x05,0x00,0x0d,0xd7,0xff,0xff,0x03,0x00]
+v_add_nc_i16 v5.l, v255.l, v255.h
+// GFX11: [0x05,0x10,0x0d,0xd7,0xff,0xff,0x03,0x00]
-v_add_nc_i16 v5, s1, s2
-// GFX11: encoding: [0x05,0x00,0x0d,0xd7,0x01,0x04,0x00,0x00]
+v_add_nc_i16 v5.l, s1, s2
+// GFX11: [0x05,0x00,0x0d,0xd7,0x01,0x04,0x00,0x00]
-v_add_nc_i16 v5, s105, s105
-// GFX11: encoding: [0x05,0x00,0x0d,0xd7,0x69,0xd2,0x00,0x00]
+v_add_nc_i16 v5.l, s105, s105
+// GFX11: [0x05,0x00,0x0d,0xd7,0x69,0xd2,0x00,0x00]
-v_add_nc_i16 v5, vcc_lo, ttmp15
-// GFX11: encoding: [0x05,0x00,0x0d,0xd7,0x6a,0xf6,0x00,0x00]
+v_add_nc_i16 v5.l, vcc_lo, ttmp15
+// GFX11: [0x05,0x00,0x0d,0xd7,0x6a,0xf6,0x00,0x00]
-v_add_nc_i16 v5, vcc_hi, 0xfe0b
-// GFX11: encoding: [0x05,0x00,0x0d,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+v_add_nc_i16 v5.l, vcc_hi, 0xfe0b
+// GFX11: [0x05,0x00,0x0d,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
-v_add_nc_i16 v5, ttmp15, src_scc
-// GFX11: encoding: [0x05,0x00,0x0d,0xd7,0x7b,0xfa,0x01,0x00]
+v_add_nc_i16 v5.l, ttmp15, src_scc
+// GFX11: [0x05,0x00,0x0d,0xd7,0x7b,0xfa,0x01,0x00]
-v_add_nc_i16 v5, m0, 0.5
+v_add_nc_i16 v5.l, m0, 0.5
// GFX11: encoding: [0x05,0x00,0x0d,0xd7,0x7d,0xe0,0x01,0x00]
-v_add_nc_i16 v5, exec_lo, -1
-// GFX11: encoding: [0x05,0x00,0x0d,0xd7,0x7e,0x82,0x01,0x00]
+v_add_nc_i16 v5.l, exec_lo, -1
+// GFX11: [0x05,0x00,0x0d,0xd7,0x7e,0x82,0x01,0x00]
-v_add_nc_i16 v5, exec_hi, null
-// GFX11: encoding: [0x05,0x00,0x0d,0xd7,0x7f,0xf8,0x00,0x00]
+v_add_nc_i16 v5.l, exec_hi, null
+// GFX11: [0x05,0x00,0x0d,0xd7,0x7f,0xf8,0x00,0x00]
-v_add_nc_i16 v5, null, exec_lo op_sel:[1,1,1]
-// GFX11: encoding: [0x05,0x58,0x0d,0xd7,0x7c,0xfc,0x00,0x00]
+v_add_nc_i16 v5.l, null, exec_lo
+// GFX11: [0x05,0x00,0x0d,0xd7,0x7c,0xfc,0x00,0x00]
-v_add_nc_i16 v5, -1, exec_hi op_sel:[0,0,0]
-// GFX11: encoding: [0x05,0x00,0x0d,0xd7,0xc1,0xfe,0x00,0x00]
+v_add_nc_i16 v5.l, -1, exec_hi
+// GFX11: [0x05,0x00,0x0d,0xd7,0xc1,0xfe,0x00,0x00]
-v_add_nc_i16 v5, 0.5, m0 op_sel:[1,0,0]
+v_add_nc_i16 v5.l, 0.5, m0 op_sel:[1,0,0]
// GFX11: encoding: [0x05,0x08,0x0d,0xd7,0xf0,0xfa,0x00,0x00]
-v_add_nc_i16 v5, src_scc, vcc_lo op_sel:[0,1,0]
-// GFX11: encoding: [0x05,0x10,0x0d,0xd7,0xfd,0xd4,0x00,0x00]
+v_add_nc_i16 v5.l, src_scc, vcc_lo
+// GFX11: [0x05,0x00,0x0d,0xd7,0xfd,0xd4,0x00,0x00]
-v_add_nc_i16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp
-// GFX11: encoding: [0xff,0xc0,0x0d,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+v_add_nc_i16 v255.h, 0xfe0b, vcc_hi clamp
+// GFX11: [0xff,0xc0,0x0d,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+
+v_add_nc_i16 v5.l, v1.h, v2.l
+// GFX11: [0x05,0x08,0x0d,0xd7,0x01,0x05,0x02,0x00]
+
+v_add_nc_i16 v5.l, v255.l, v255.h
+// GFX11: [0x05,0x10,0x0d,0xd7,0xff,0xff,0x03,0x00]
+
+v_add_nc_i16 v255.h, 0xfe0b, vcc_hi clamp
+// GFX11: [0xff,0xc0,0x0d,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
v_add_nc_i32 v5, v1, v2
// GFX11: encoding: [0x05,0x00,0x26,0xd7,0x01,0x05,0x02,0x00]
@@ -334,50 +343,59 @@ v_add_nc_i32 v5, src_scc, vcc_lo
v_add_nc_i32 v255, 0xaf123456, vcc_hi clamp
// GFX11: encoding: [0xff,0x80,0x26,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf]
-v_add_nc_u16 v5, v1, v2
-// GFX11: encoding: [0x05,0x00,0x03,0xd7,0x01,0x05,0x02,0x00]
+v_add_nc_u16 v5.l, v1.h, v2.l
+// GFX11: [0x05,0x08,0x03,0xd7,0x01,0x05,0x02,0x00]
-v_add_nc_u16 v5, v255, v255
-// GFX11: encoding: [0x05,0x00,0x03,0xd7,0xff,0xff,0x03,0x00]
+v_add_nc_u16 v5.l, v255.l, v255.h
+// GFX11: [0x05,0x10,0x03,0xd7,0xff,0xff,0x03,0x00]
-v_add_nc_u16 v5, s1, s2
-// GFX11: encoding: [0x05,0x00,0x03,0xd7,0x01,0x04,0x00,0x00]
+v_add_nc_u16 v5.l, s1, s2
+// GFX11: [0x05,0x00,0x03,0xd7,0x01,0x04,0x00,0x00]
-v_add_nc_u16 v5, s105, s105
-// GFX11: encoding: [0x05,0x00,0x03,0xd7,0x69,0xd2,0x00,0x00]
+v_add_nc_u16 v5.l, s105, s105
+// GFX11: [0x05,0x00,0x03,0xd7,0x69,0xd2,0x00,0x00]
-v_add_nc_u16 v5, vcc_lo, ttmp15
-// GFX11: encoding: [0x05,0x00,0x03,0xd7,0x6a,0xf6,0x00,0x00]
+v_add_nc_u16 v5.l, vcc_lo, ttmp15
+// GFX11: [0x05,0x00,0x03,0xd7,0x6a,0xf6,0x00,0x00]
-v_add_nc_u16 v5, vcc_hi, 0xfe0b
-// GFX11: encoding: [0x05,0x00,0x03,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+v_add_nc_u16 v5.l, vcc_hi, 0xfe0b
+// GFX11: [0x05,0x00,0x03,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
-v_add_nc_u16 v5, ttmp15, src_scc
-// GFX11: encoding: [0x05,0x00,0x03,0xd7,0x7b,0xfa,0x01,0x00]
+v_add_nc_u16 v5.l, ttmp15, src_scc
+// GFX11: [0x05,0x00,0x03,0xd7,0x7b,0xfa,0x01,0x00]
-v_add_nc_u16 v5, m0, 0.5
+v_add_nc_u16 v5.l, m0, 0.5
// GFX11: encoding: [0x05,0x00,0x03,0xd7,0x7d,0xe0,0x01,0x00]
-v_add_nc_u16 v5, exec_lo, -1
-// GFX11: encoding: [0x05,0x00,0x03,0xd7,0x7e,0x82,0x01,0x00]
+v_add_nc_u16 v5.l, exec_lo, -1
+// GFX11: [0x05,0x00,0x03,0xd7,0x7e,0x82,0x01,0x00]
-v_add_nc_u16 v5, exec_hi, null
-// GFX11: encoding: [0x05,0x00,0x03,0xd7,0x7f,0xf8,0x00,0x00]
+v_add_nc_u16 v5.l, exec_hi, null
+// GFX11: [0x05,0x00,0x03,0xd7,0x7f,0xf8,0x00,0x00]
-v_add_nc_u16 v5, null, exec_lo op_sel:[1,1,1]
-// GFX11: encoding: [0x05,0x58,0x03,0xd7,0x7c,0xfc,0x00,0x00]
+v_add_nc_u16 v5.l, null, exec_lo
+// GFX11: [0x05,0x00,0x03,0xd7,0x7c,0xfc,0x00,0x00]
-v_add_nc_u16 v5, -1, exec_hi op_sel:[0,0,0]
-// GFX11: encoding: [0x05,0x00,0x03,0xd7,0xc1,0xfe,0x00,0x00]
+v_add_nc_u16 v5.l, -1, exec_hi
+// GFX11: [0x05,0x00,0x03,0xd7,0xc1,0xfe,0x00,0x00]
-v_add_nc_u16 v5, 0.5, m0 op_sel:[1,0,0]
+v_add_nc_u16 v5.l, 0.5, m0 op_sel:[1,0,0]
// GFX11: encoding: [0x05,0x08,0x03,0xd7,0xf0,0xfa,0x00,0x00]
-v_add_nc_u16 v5, src_scc, vcc_lo op_sel:[0,1,0]
-// GFX11: encoding: [0x05,0x10,0x03,0xd7,0xfd,0xd4,0x00,0x00]
+v_add_nc_u16 v5.l, src_scc, vcc_lo
+// GFX11: [0x05,0x00,0x03,0xd7,0xfd,0xd4,0x00,0x00]
+
+v_add_nc_u16 v255.h, 0xfe0b, vcc_hi clamp
+// GFX11: [0xff,0xc0,0x03,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+
+v_add_nc_u16 v5.l, v1.h, v2.l
+// GFX11: [0x05,0x08,0x03,0xd7,0x01,0x05,0x02,0x00]
+
+v_add_nc_u16 v5.l, v255.l, v255.h
+// GFX11: [0x05,0x10,0x03,0xd7,0xff,0xff,0x03,0x00]
-v_add_nc_u16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp
-// GFX11: encoding: [0xff,0xc0,0x03,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+v_add_nc_u16 v255.h, 0xfe0b, vcc_hi clamp
+// GFX11: [0xff,0xc0,0x03,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
v_alignbit_b32 v5, v1, v2, s3
// GFX11: encoding: [0x05,0x00,0x16,0xd6,0x01,0x05,0x0e,0x00]
@@ -5801,50 +5819,59 @@ v_sub_co_u32 v5, ttmp[14:15], src_scc, vcc_lo
v_sub_co_u32 v255, null, 0xaf123456, vcc_hi clamp
// GFX11: encoding: [0xff,0xfc,0x01,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf]
-v_sub_nc_i16 v5, v1, v2
-// GFX11: encoding: [0x05,0x00,0x0e,0xd7,0x01,0x05,0x02,0x00]
+v_sub_nc_i16 v5.l, v1.h, v2.l
+// GFX11: [0x05,0x08,0x0e,0xd7,0x01,0x05,0x02,0x00]
-v_sub_nc_i16 v5, v255, v255
-// GFX11: encoding: [0x05,0x00,0x0e,0xd7,0xff,0xff,0x03,0x00]
+v_sub_nc_i16 v5.l, v255.l, v255.h
+// GFX11: [0x05,0x10,0x0e,0xd7,0xff,0xff,0x03,0x00]
-v_sub_nc_i16 v5, s1, s2
-// GFX11: encoding: [0x05,0x00,0x0e,0xd7,0x01,0x04,0x00,0x00]
+v_sub_nc_i16 v5.l, s1, s2
+// GFX11: [0x05,0x00,0x0e,0xd7,0x01,0x04,0x00,0x00]
-v_sub_nc_i16 v5, s105, s105
-// GFX11: encoding: [0x05,0x00,0x0e,0xd7,0x69,0xd2,0x00,0x00]
+v_sub_nc_i16 v5.l, s105, s105
+// GFX11: [0x05,0x00,0x0e,0xd7,0x69,0xd2,0x00,0x00]
-v_sub_nc_i16 v5, vcc_lo, ttmp15
-// GFX11: encoding: [0x05,0x00,0x0e,0xd7,0x6a,0xf6,0x00,0x00]
+v_sub_nc_i16 v5.l, vcc_lo, ttmp15
+// GFX11: [0x05,0x00,0x0e,0xd7,0x6a,0xf6,0x00,0x00]
-v_sub_nc_i16 v5, vcc_hi, 0xfe0b
-// GFX11: encoding: [0x05,0x00,0x0e,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+v_sub_nc_i16 v5.l, vcc_hi, 0xfe0b
+// GFX11: [0x05,0x00,0x0e,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
-v_sub_nc_i16 v5, ttmp15, src_scc
-// GFX11: encoding: [0x05,0x00,0x0e,0xd7,0x7b,0xfa,0x01,0x00]
+v_sub_nc_i16 v5.l, ttmp15, src_scc
+// GFX11: [0x05,0x00,0x0e,0xd7,0x7b,0xfa,0x01,0x00]
-v_sub_nc_i16 v5, m0, 0.5
+v_sub_nc_i16 v5.l, m0, 0.5
// GFX11: encoding: [0x05,0x00,0x0e,0xd7,0x7d,0xe0,0x01,0x00]
-v_sub_nc_i16 v5, exec_lo, -1
-// GFX11: encoding: [0x05,0x00,0x0e,0xd7,0x7e,0x82,0x01,0x00]
+v_sub_nc_i16 v5.l, exec_lo, -1
+// GFX11: [0x05,0x00,0x0e,0xd7,0x7e,0x82,0x01,0x00]
-v_sub_nc_i16 v5, exec_hi, null
-// GFX11: encoding: [0x05,0x00,0x0e,0xd7,0x7f,0xf8,0x00,0x00]
+v_sub_nc_i16 v5.l, exec_hi, null
+// GFX11: [0x05,0x00,0x0e,0xd7,0x7f,0xf8,0x00,0x00]
-v_sub_nc_i16 v5, null, exec_lo op_sel:[1,1,1]
-// GFX11: encoding: [0x05,0x58,0x0e,0xd7,0x7c,0xfc,0x00,0x00]
+v_sub_nc_i16 v5.l, null, exec_lo
+// GFX11: [0x05,0x00,0x0e,0xd7,0x7c,0xfc,0x00,0x00]
-v_sub_nc_i16 v5, -1, exec_hi op_sel:[0,0,0]
-// GFX11: encoding: [0x05,0x00,0x0e,0xd7,0xc1,0xfe,0x00,0x00]
+v_sub_nc_i16 v5.l, -1, exec_hi
+// GFX11: [0x05,0x00,0x0e,0xd7,0xc1,0xfe,0x00,0x00]
-v_sub_nc_i16 v5, 0.5, m0 op_sel:[1,0,0]
+v_sub_nc_i16 v5.l, 0.5, m0 op_sel:[1,0,0]
// GFX11: encoding: [0x05,0x08,0x0e,0xd7,0xf0,0xfa,0x00,0x00]
-v_sub_nc_i16 v5, src_scc, vcc_lo op_sel:[0,1,0]
-// GFX11: encoding: [0x05,0x10,0x0e,0xd7,0xfd,0xd4,0x00,0x00]
+v_sub_nc_i16 v5.l, src_scc, vcc_lo
+// GFX11: [0x05,0x00,0x0e,0xd7,0xfd,0xd4,0x00,0x00]
-v_sub_nc_i16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp
-// GFX11: encoding: [0xff,0xc0,0x0e,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+v_sub_nc_i16 v255.h, 0xfe0b, vcc_hi clamp
+// GFX11: [0xff,0xc0,0x0e,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+
+v_sub_nc_i16 v5.l, v1.h, v2.l
+// GFX11: [0x05,0x08,0x0e,0xd7,0x01,0x05,0x02,0x00]
+
+v_sub_nc_i16 v5.l, v255.l, v255.h
+// GFX11: [0x05,0x10,0x0e,0xd7,0xff,0xff,0x03,0x00]
+
+v_sub_nc_i16 v255.h, 0xfe0b, vcc_hi clamp
+// GFX11: [0xff,0xc0,0x0e,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
v_sub_nc_i32 v5, v1, v2
// GFX11: encoding: [0x05,0x00,0x25,0xd7,0x01,0x05,0x02,0x00]
@@ -5891,50 +5918,59 @@ v_sub_nc_i32 v5, src_scc, vcc_lo
v_sub_nc_i32 v255, 0xaf123456, vcc_hi clamp
// GFX11: encoding: [0xff,0x80,0x25,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf]
-v_sub_nc_u16 v5, v1, v2
-// GFX11: encoding: [0x05,0x00,0x04,0xd7,0x01,0x05,0x02,0x00]
+v_sub_nc_u16 v5.l, v1.h, v2.l
+// GFX11: [0x05,0x08,0x04,0xd7,0x01,0x05,0x02,0x00]
-v_sub_nc_u16 v5, v255, v255
-// GFX11: encoding: [0x05,0x00,0x04,0xd7,0xff,0xff,0x03,0x00]
+v_sub_nc_u16 v5.l, v255.l, v255.h
+// GFX11: [0x05,0x10,0x04,0xd7,0xff,0xff,0x03,0x00]
-v_sub_nc_u16 v5, s1, s2
-// GFX11: encoding: [0x05,0x00,0x04,0xd7,0x01,0x04,0x00,0x00]
+v_sub_nc_u16 v5.l, s1, s2
+// GFX11: [0x05,0x00,0x04,0xd7,0x01,0x04,0x00,0x00]
-v_sub_nc_u16 v5, s105, s105
-// GFX11: encoding: [0x05,0x00,0x04,0xd7,0x69,0xd2,0x00,0x00]
+v_sub_nc_u16 v5.l, s105, s105
+// GFX11: [0x05,0x00,0x04,0xd7,0x69,0xd2,0x00,0x00]
-v_sub_nc_u16 v5, vcc_lo, ttmp15
-// GFX11: encoding: [0x05,0x00,0x04,0xd7,0x6a,0xf6,0x00,0x00]
+v_sub_nc_u16 v5.l, vcc_lo, ttmp15
+// GFX11: [0x05,0x00,0x04,0xd7,0x6a,0xf6,0x00,0x00]
-v_sub_nc_u16 v5, vcc_hi, 0xfe0b
-// GFX11: encoding: [0x05,0x00,0x04,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+v_sub_nc_u16 v5.l, vcc_hi, 0xfe0b
+// GFX11: [0x05,0x00,0x04,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
-v_sub_nc_u16 v5, ttmp15, src_scc
-// GFX11: encoding: [0x05,0x00,0x04,0xd7,0x7b,0xfa,0x01,0x00]
+v_sub_nc_u16 v5.l, ttmp15, src_scc
+// GFX11: [0x05,0x00,0x04,0xd7,0x7b,0xfa,0x01,0x00]
-v_sub_nc_u16 v5, m0, 0.5
+v_sub_nc_u16 v5.l, m0, 0.5
// GFX11: encoding: [0x05,0x00,0x04,0xd7,0x7d,0xe0,0x01,0x00]
-v_sub_nc_u16 v5, exec_lo, -1
-// GFX11: encoding: [0x05,0x00,0x04,0xd7,0x7e,0x82,0x01,0x00]
+v_sub_nc_u16 v5.l, exec_lo, -1
+// GFX11: [0x05,0x00,0x04,0xd7,0x7e,0x82,0x01,0x00]
-v_sub_nc_u16 v5, exec_hi, null
-// GFX11: encoding: [0x05,0x00,0x04,0xd7,0x7f,0xf8,0x00,0x00]
+v_sub_nc_u16 v5.l, exec_hi, null
+// GFX11: [0x05,0x00,0x04,0xd7,0x7f,0xf8,0x00,0x00]
-v_sub_nc_u16 v5, null, exec_lo op_sel:[1,1,1]
-// GFX11: encoding: [0x05,0x58,0x04,0xd7,0x7c,0xfc,0x00,0x00]
+v_sub_nc_u16 v5.l, null, exec_lo
+// GFX11: [0x05,0x00,0x04,0xd7,0x7c,0xfc,0x00,0x00]
-v_sub_nc_u16 v5, -1, exec_hi op_sel:[0,0,0]
-// GFX11: encoding: [0x05,0x00,0x04,0xd7,0xc1,0xfe,0x00,0x00]
+v_sub_nc_u16 v5.l, -1, exec_hi
+// GFX11: [0x05,0x00,0x04,0xd7,0xc1,0xfe,0x00,0x00]
-v_sub_nc_u16 v5, 0.5, m0 op_sel:[1,0,0]
+v_sub_nc_u16 v5.l, 0.5, m0 op_sel:[1,0,0]
// GFX11: encoding: [0x05,0x08,0x04,0xd7,0xf0,0xfa,0x00,0x00]
-v_sub_nc_u16 v5, src_scc, vcc_lo op_sel:[0,1,0]
-// GFX11: encoding: [0x05,0x10,0x04,0xd7,0xfd,0xd4,0x00,0x00]
+v_sub_nc_u16 v5.l, src_scc, vcc_lo
+// GFX11: [0x05,0x00,0x04,0xd7,0xfd,0xd4,0x00,0x00]
+
+v_sub_nc_u16 v255.h, 0xfe0b, vcc_hi clamp
+// GFX11: [0xff,0xc0,0x04,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+
+v_sub_nc_u16 v5.l, v1.h, v2.l
+// GFX11: [0x05,0x08,0x04,0xd7,0x01,0x05,0x02,0x00]
+
+v_sub_nc_u16 v5.l, v255.l, v255.h
+// GFX11: [0x05,0x10,0x04,0xd7,0xff,0xff,0x03,0x00]
-v_sub_nc_u16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp
-// GFX11: encoding: [0xff,0xc0,0x04,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+v_sub_nc_u16 v255.h, 0xfe0b, vcc_hi clamp
+// GFX11: [0xff,0xc0,0x04,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
v_subrev_co_u32 v5, s6, v1, v2
// W32: encoding: [0x05,0x06,0x02,0xd7,0x01,0x05,0x02,0x00]
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s
index 3c693c556194e1..a25e8a6ccae60a 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s
@@ -194,47 +194,59 @@ v_add_lshl_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bo
v_add_lshl_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
// GFX11: [0xff,0x00,0x47,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30]
-v_add_nc_i16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0]
+v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0]
// GFX11: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
-v_add_nc_i16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3]
+v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3]
// GFX11: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
-v_add_nc_i16_e64_dpp v5, v1, v2 row_mirror
+v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_mirror
// GFX11: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
-v_add_nc_i16_e64_dpp v5, v1, v2 row_half_mirror
+v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_half_mirror
// GFX11: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
-v_add_nc_i16_e64_dpp v5, v1, v2 row_shl:1
+v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shl:1
// GFX11: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
-v_add_nc_i16_e64_dpp v5, v1, v2 row_shl:15
+v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shl:15
// GFX11: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
-v_add_nc_i16_e64_dpp v5, v1, v2 row_shr:1
+v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shr:1
// GFX11: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
-v_add_nc_i16_e64_dpp v5, v1, v2 row_shr:15
+v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shr:15
// GFX11: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
-v_add_nc_i16_e64_dpp v5, v1, v2 row_ror:1
+v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_ror:1
// GFX11: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
-v_add_nc_i16_e64_dpp v5, v1, v2 row_ror:15
+v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_ror:15
// GFX11: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
-v_add_nc_i16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// GFX11: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+v_add_nc_i16_e64_dpp v5.h, v1.h, v2.h row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+
+v_add_nc_i16_e64_dpp v5.l, v1.h, v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+
+v_add_nc_i16_e64_dpp v5.l, v1.l, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+
+v_add_nc_i16_e64_dpp v255.h, v255.l, v255.l clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
+
+v_add_nc_i16_e64_dpp v5.h, v1.h, v2.h row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
-v_add_nc_i16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// GFX11: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+v_add_nc_i16_e64_dpp v5.l, v1.h, v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
-v_add_nc_i16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// GFX11: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+v_add_nc_i16_e64_dpp v5.l, v1.l, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
-v_add_nc_i16_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// GFX11: [0xff,0x80,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
+v_add_nc_i16_e64_dpp v255.h, v255.l, v255.l clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
v_add_nc_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0]
// GFX11: [0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
@@ -278,47 +290,59 @@ v_add_nc_i32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctr
v_add_nc_i32_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
// GFX11: [0xff,0x80,0x26,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
-v_add_nc_u16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0]
+v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0]
// GFX11: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
-v_add_nc_u16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3]
+v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3]
// GFX11: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
-v_add_nc_u16_e64_dpp v5, v1, v2 row_mirror
+v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_mirror
// GFX11: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
-v_add_nc_u16_e64_dpp v5, v1, v2 row_half_mirror
+v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_half_mirror
// GFX11: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
-v_add_nc_u16_e64_dpp v5, v1, v2 row_shl:1
+v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shl:1
// GFX11: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
-v_add_nc_u16_e64_dpp v5, v1, v2 row_shl:15
+v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shl:15
// GFX11: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
-v_add_nc_u16_e64_dpp v5, v1, v2 row_shr:1
+v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shr:1
// GFX11: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
-v_add_nc_u16_e64_dpp v5, v1, v2 row_shr:15
+v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shr:15
// GFX11: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
-v_add_nc_u16_e64_dpp v5, v1, v2 row_ror:1
+v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_ror:1
// GFX11: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
-v_add_nc_u16_e64_dpp v5, v1, v2 row_ror:15
+v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_ror:15
// GFX11: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
-v_add_nc_u16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// GFX11: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+v_add_nc_u16_e64_dpp v5.h, v1.h, v2.h row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+
+v_add_nc_u16_e64_dpp v5.l, v1.h, v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+
+v_add_nc_u16_e64_dpp v5.l, v1.l, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+
+v_add_nc_u16_e64_dpp v255.h, v255.l, v255.l clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
+
+v_add_nc_u16_e64_dpp v5.h, v1.h, v2.h row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
-v_add_nc_u16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// GFX11: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+v_add_nc_u16_e64_dpp v5.l, v1.h, v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
-v_add_nc_u16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// GFX11: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+v_add_nc_u16_e64_dpp v5.l, v1.l, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
-v_add_nc_u16_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// GFX11: [0xff,0x80,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
+v_add_nc_u16_e64_dpp v255.h, v255.l, v255.l clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
v_alignbit_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0]
// GFX11: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff]
@@ -4116,47 +4140,59 @@ v_sub_co_u32_e64_dpp v5, ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:
v_sub_co_u32_e64_dpp v255, null, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
// GFX11: [0xff,0xfc,0x01,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
-v_sub_nc_i16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0]
+v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0]
// GFX11: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
-v_sub_nc_i16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3]
+v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3]
// GFX11: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
-v_sub_nc_i16_e64_dpp v5, v1, v2 row_mirror
+v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_mirror
// GFX11: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
-v_sub_nc_i16_e64_dpp v5, v1, v2 row_half_mirror
+v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_half_mirror
// GFX11: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
-v_sub_nc_i16_e64_dpp v5, v1, v2 row_shl:1
+v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shl:1
// GFX11: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
-v_sub_nc_i16_e64_dpp v5, v1, v2 row_shl:15
+v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shl:15
// GFX11: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
-v_sub_nc_i16_e64_dpp v5, v1, v2 row_shr:1
+v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shr:1
// GFX11: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
-v_sub_nc_i16_e64_dpp v5, v1, v2 row_shr:15
+v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shr:15
// GFX11: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
-v_sub_nc_i16_e64_dpp v5, v1, v2 row_ror:1
+v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_ror:1
// GFX11: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
-v_sub_nc_i16_e64_dpp v5, v1, v2 row_ror:15
+v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_ror:15
// GFX11: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
-v_sub_nc_i16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// GFX11: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+v_sub_nc_i16_e64_dpp v5.h, v1.h, v2.h row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+
+v_sub_nc_i16_e64_dpp v5.l, v1.h, v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+
+v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+
+v_sub_nc_i16_e64_dpp v255.h, v255.l, v255.l clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
-v_sub_nc_i16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// GFX11: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+v_sub_nc_i16_e64_dpp v5.h, v1.h, v2.h row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+
+v_sub_nc_i16_e64_dpp v5.l, v1.h, v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
-v_sub_nc_i16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// GFX11: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
-v_sub_nc_i16_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// GFX11: [0xff,0x80,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
+v_sub_nc_i16_e64_dpp v255.h, v255.l, v255.l clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
v_sub_nc_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0]
// GFX11: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
@@ -4200,47 +4236,59 @@ v_sub_nc_i32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctr
v_sub_nc_i32_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
// GFX11: [0xff,0x80,0x25,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
-v_sub_nc_u16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0]
+v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0]
// GFX11: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
-v_sub_nc_u16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3]
+v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3]
// GFX11: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
-v_sub_nc_u16_e64_dpp v5, v1, v2 row_mirror
+v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_mirror
// GFX11: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
-v_sub_nc_u16_e64_dpp v5, v1, v2 row_half_mirror
+v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_half_mirror
// GFX11: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
-v_sub_nc_u16_e64_dpp v5, v1, v2 row_shl:1
+v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shl:1
// GFX11: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
-v_sub_nc_u16_e64_dpp v5, v1, v2 row_shl:15
+v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shl:15
// GFX11: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
-v_sub_nc_u16_e64_dpp v5, v1, v2 row_shr:1
+v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shr:1
// GFX11: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
-v_sub_nc_u16_e64_dpp v5, v1, v2 row_shr:15
+v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shr:15
// GFX11: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
-v_sub_nc_u16_e64_dpp v5, v1, v2 row_ror:1
+v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_ror:1
// GFX11: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
-v_sub_nc_u16_e64_dpp v5, v1, v2 row_ror:15
+v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_ror:15
// GFX11: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
-v_sub_nc_u16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// GFX11: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+v_sub_nc_u16_e64_dpp v5.h, v1.h, v2.h row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+
+v_sub_nc_u16_e64_dpp v5.l, v1.h, v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+
+v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
-v_sub_nc_u16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// GFX11: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+v_sub_nc_u16_e64_dpp v255.h, v255.l, v255.l clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
-v_sub_nc_u16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// GFX11: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+v_sub_nc_u16_e64_dpp v5.h, v1.h, v2.h row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+
+v_sub_nc_u16_e64_dpp v5.l, v1.h, v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+
+v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
-v_sub_nc_u16_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// GFX11: [0xff,0x80,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
+v_sub_nc_u16_e64_dpp v255.h, v255.l, v255.l clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
v_subrev_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0]
// W32: [0x05,0x06,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
@@ -4475,30 +4523,6 @@ v_xor_b16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
v_xor_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
// GFX11: [0xff,0x00,0x64,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
-v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf
-// GFX11: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
-
-v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1
-// GFX11: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
-
-v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3
-// GFX11: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
-
-v_add_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1
-// GFX11: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
-
-v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf
-// GFX11: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
-
-v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1
-// GFX11: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
-
-v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3
-// GFX11: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
-
-v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1
-// GFX11: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
-
v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3
// GFX11: [0x05,0x0a,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
@@ -4724,30 +4748,6 @@ v_pack_b32_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 ban
v_pack_b32_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1
// GFX11: [0xff,0x13,0x11,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
-v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf
-// GFX11: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
-
-v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1
-// GFX11: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
-
-v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3
-// GFX11: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
-
-v_sub_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1
-// GFX11: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
-
-v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf
-// GFX11: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
-
-v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1
-// GFX11: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
-
-v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3
-// GFX11: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
-
-v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1
-// GFX11: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
-
v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1
// GFX11: encoding: [0x00,0x00,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00]
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s
index 79709278bc0c7b..f9803f4400702d 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s
@@ -114,14 +114,35 @@ v_add_lshl_u32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1
v_add_lshl_u32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0
// GFX11: [0xff,0x00,0x47,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00]
-v_add_nc_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
// GFX11: [0x05,0x00,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-v_add_nc_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
-// GFX11: [0x05,0x00,0x0d,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: [0x05,0x00,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_add_nc_i16_e64_dpp v5.h, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: [0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_add_nc_i16_e64_dpp v5.l, v1.h, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: [0x05,0x08,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_add_nc_i16_e64_dpp v5.l, v1.l, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX11: [0x05,0x10,0x0d,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_add_nc_i16_e64_dpp v255.h, v255.l, v255.l clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: [0xff,0xc0,0x0d,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+
+v_add_nc_i16_e64_dpp v5.h, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: [0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-v_add_nc_i16_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
-// GFX11: [0xff,0x80,0x0d,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+v_add_nc_i16_e64_dpp v5.l, v1.h, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: [0x05,0x08,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_add_nc_i16_e64_dpp v5.l, v1.l, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX11: [0x05,0x10,0x0d,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_add_nc_i16_e64_dpp v255.h, v255.l, v255.l clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: [0xff,0xc0,0x0d,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
v_add_nc_i32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: [0x05,0x00,0x26,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
@@ -132,14 +153,35 @@ v_add_nc_i32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
v_add_nc_i32_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
// GFX11: [0xff,0x80,0x26,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
-v_add_nc_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: [0x05,0x00,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
// GFX11: [0x05,0x00,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-v_add_nc_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
-// GFX11: [0x05,0x00,0x03,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_add_nc_u16_e64_dpp v5.h, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: [0x05,0x58,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_add_nc_u16_e64_dpp v5.l, v1.h, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: [0x05,0x08,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_add_nc_u16_e64_dpp v5.l, v1.l, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX11: [0x05,0x10,0x03,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-v_add_nc_u16_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
-// GFX11: [0xff,0x80,0x03,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+v_add_nc_u16_e64_dpp v255.h, v255.l, v255.l clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: [0xff,0xc0,0x03,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+
+v_add_nc_u16_e64_dpp v5.h, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: [0x05,0x58,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_add_nc_u16_e64_dpp v5.l, v1.h, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: [0x05,0x08,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_add_nc_u16_e64_dpp v5.l, v1.l, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX11: [0x05,0x10,0x03,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_add_nc_u16_e64_dpp v255.h, v255.l, v255.l clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: [0xff,0xc0,0x03,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
v_alignbit_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: [0x05,0x00,0x16,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05]
@@ -2601,14 +2643,35 @@ v_sub_co_u32_e64_dpp v5, ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
v_sub_co_u32_e64_dpp v255, null, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
// GFX11: [0xff,0xfc,0x01,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
-v_sub_nc_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: [0x05,0x00,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
// GFX11: [0x05,0x00,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-v_sub_nc_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
-// GFX11: [0x05,0x00,0x0e,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_sub_nc_i16_e64_dpp v5.h, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: [0x05,0x58,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_sub_nc_i16_e64_dpp v5.l, v1.h, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: [0x05,0x08,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX11: [0x05,0x10,0x0e,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_sub_nc_i16_e64_dpp v255.h, v255.l, v255.l clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: [0xff,0xc0,0x0e,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+
+v_sub_nc_i16_e64_dpp v5.h, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: [0x05,0x58,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_sub_nc_i16_e64_dpp v5.l, v1.h, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: [0x05,0x08,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX11: [0x05,0x10,0x0e,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-v_sub_nc_i16_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
-// GFX11: [0xff,0x80,0x0e,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+v_sub_nc_i16_e64_dpp v255.h, v255.l, v255.l clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: [0xff,0xc0,0x0e,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
v_sub_nc_i32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: [0x05,0x00,0x25,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
@@ -2619,14 +2682,35 @@ v_sub_nc_i32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
v_sub_nc_i32_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
// GFX11: [0xff,0x80,0x25,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
-v_sub_nc_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
// GFX11: [0x05,0x00,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-v_sub_nc_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
-// GFX11: [0x05,0x00,0x04,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: [0x05,0x00,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_sub_nc_u16_e64_dpp v5.h, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: [0x05,0x58,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_sub_nc_u16_e64_dpp v5.l, v1.h, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: [0x05,0x08,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX11: [0x05,0x10,0x04,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_sub_nc_u16_e64_dpp v255.h, v255.l, v255.l clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: [0xff,0xc0,0x04,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+
+v_sub_nc_u16_e64_dpp v5.h, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: [0x05,0x58,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-v_sub_nc_u16_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
-// GFX11: [0xff,0x80,0x04,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+v_sub_nc_u16_e64_dpp v5.l, v1.h, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: [0x05,0x08,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX11: [0x05,0x10,0x04,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_sub_nc_u16_e64_dpp v255.h, v255.l, v255.l clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: [0xff,0xc0,0x04,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
v_subrev_co_u32_e64_dpp v5, s6, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
// W32: [0x05,0x06,0x02,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
@@ -2748,30 +2832,6 @@ v_xor_b16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
v_xor_b16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
// GFX11: [0xff,0x00,0x64,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
-v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: [0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-
-v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: [0x05,0x08,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-
-v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: [0x05,0x10,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-
-v_add_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1
-// GFX11: [0xff,0xc0,0x0d,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
-
-v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: [0x05,0x58,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-
-v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: [0x05,0x08,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-
-v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: [0x05,0x10,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-
-v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1
-// GFX11: [0xff,0xc0,0x03,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
-
v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0]
// GFX11: [0x05,0x0a,0x12,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
@@ -2997,30 +3057,6 @@ v_pack_b32_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0]
v_pack_b32_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1
// GFX11: [0xff,0x13,0x11,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
-v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: [0x05,0x58,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-
-v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: [0x05,0x08,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-
-v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: [0x05,0x10,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-
-v_sub_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1
-// GFX11: [0xff,0xc0,0x0e,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
-
-v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: [0x05,0x58,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-
-v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: [0x05,0x08,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-
-v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: [0x05,0x10,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-
-v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1
-// GFX11: [0xff,0xc0,0x04,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
-
v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 dpp8:[0,1,2,3,4,4,4,4]
// GFX11: encoding: [0x00,0x00,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92]
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_opsel.s b/llvm/test/MC/AMDGPU/gfx12_asm_opsel.s
new file mode 100644
index 00000000000000..0c270aa5c8b1e2
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_opsel.s
@@ -0,0 +1,48 @@
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12 %s
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12 %s
+
+v_add_nc_i16 v5.h, null, exec_lo op_sel:[1,1,1]
+// GFX12: encoding: [0x05,0x58,0x0d,0xd7,0x7c,0xfc,0x00,0x00]
+
+v_add_nc_i16 v5.l, -1, exec_hi op_sel:[0,0,0]
+// GFX12: encoding: [0x05,0x00,0x0d,0xd7,0xc1,0xfe,0x00,0x00]
+
+v_add_nc_i16 v5.l, 0.5, m0 op_sel:[1,0,0]
+// GFX12: encoding: [0x05,0x08,0x0d,0xd7,0xf0,0xfa,0x00,0x00]
+
+v_add_nc_i16 v5.l, src_scc, vcc_lo op_sel:[0,1,0]
+// GFX12: encoding: [0x05,0x10,0x0d,0xd7,0xfd,0xd4,0x00,0x00]
+
+v_add_nc_i16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp
+// GFX12: encoding: [0xff,0xc0,0x0d,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+
+v_sub_nc_i16 v5.h, null, exec_lo op_sel:[1,1,1]
+// GFX12: encoding: [0x05,0x58,0x0e,0xd7,0x7c,0xfc,0x00,0x00]
+
+v_sub_nc_i16 v5.l, -1, exec_hi op_sel:[0,0,0]
+// GFX12: encoding: [0x05,0x00,0x0e,0xd7,0xc1,0xfe,0x00,0x00]
+
+v_sub_nc_i16 v5.l, 0.5, m0 op_sel:[1,0,0]
+// GFX12: encoding: [0x05,0x08,0x0e,0xd7,0xf0,0xfa,0x00,0x00]
+
+v_sub_nc_i16 v5.l, src_scc, vcc_lo op_sel:[0,1,0]
+// GFX12: encoding: [0x05,0x10,0x0e,0xd7,0xfd,0xd4,0x00,0x00]
+
+v_sub_nc_i16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp
+// GFX12: encoding: [0xff,0xc0,0x0e,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+
+v_sub_nc_u16 v5.h, null, exec_lo op_sel:[1,1,1]
+// GFX12: encoding: [0x05,0x58,0x04,0xd7,0x7c,0xfc,0x00,0x00]
+
+v_sub_nc_u16 v5.l, -1, exec_hi op_sel:[0,0,0]
+// GFX12: encoding: [0x05,0x00,0x04,0xd7,0xc1,0xfe,0x00,0x00]
+
+v_sub_nc_u16 v5.l, 0.5, m0 op_sel:[1,0,0]
+// GFX12: encoding: [0x05,0x08,0x04,0xd7,0xf0,0xfa,0x00,0x00]
+
+v_sub_nc_u16 v5.l, src_scc, vcc_lo op_sel:[0,1,0]
+// GFX12: encoding: [0x05,0x10,0x04,0xd7,0xfd,0xd4,0x00,0x00]
+
+v_sub_nc_u16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp
+// GFX12: encoding: [0xff,0xc0,0x04,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3.s
index f28933ec3a8945..1ae1eaf1ceeadd 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3.s
@@ -208,49 +208,58 @@ v_add_lshl_u32 v5, src_scc, vcc_lo, -1
v_add_lshl_u32 v255, 0xaf123456, vcc_hi, null
// GFX12: encoding: [0xff,0x00,0x47,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf]
-v_add_nc_i16 v5, v1, v2
+v_add_nc_i16 v5.l, v1.l, v2.l
// GFX12: encoding: [0x05,0x00,0x0d,0xd7,0x01,0x05,0x02,0x00]
-v_add_nc_i16 v5, v255, v255
+v_add_nc_i16 v5.l, v1.h, v2.l
+// GFX12: encoding: [0x05,0x08,0x0d,0xd7,0x01,0x05,0x02,0x00]
+
+v_add_nc_i16 v5.l, v255.l, v255.l
// GFX12: encoding: [0x05,0x00,0x0d,0xd7,0xff,0xff,0x03,0x00]
-v_add_nc_i16 v5, s1, s2
+v_add_nc_i16 v5.l, v255.l, v255.h
+// GFX12: encoding: [0x05,0x10,0x0d,0xd7,0xff,0xff,0x03,0x00]
+
+v_add_nc_i16 v5.l, s1, s2
// GFX12: encoding: [0x05,0x00,0x0d,0xd7,0x01,0x04,0x00,0x00]
-v_add_nc_i16 v5, s105, s105
+v_add_nc_i16 v5.l, s105, s105
// GFX12: encoding: [0x05,0x00,0x0d,0xd7,0x69,0xd2,0x00,0x00]
-v_add_nc_i16 v5, vcc_lo, ttmp15
+v_add_nc_i16 v5.l, vcc_lo, ttmp15
// GFX12: encoding: [0x05,0x00,0x0d,0xd7,0x6a,0xf6,0x00,0x00]
-v_add_nc_i16 v5, vcc_hi, 0xfe0b
+v_add_nc_i16 v5.l, vcc_hi, 0xfe0b
// GFX12: encoding: [0x05,0x00,0x0d,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
-v_add_nc_i16 v5, ttmp15, src_scc
+v_add_nc_i16 v5.l, ttmp15, src_scc
// GFX12: encoding: [0x05,0x00,0x0d,0xd7,0x7b,0xfa,0x01,0x00]
-v_add_nc_i16 v5, m0, 0.5
+v_add_nc_i16 v5.l, m0, 0.5
// GFX12: encoding: [0x05,0x00,0x0d,0xd7,0x7d,0xe0,0x01,0x00]
-v_add_nc_i16 v5, exec_lo, -1
+v_add_nc_i16 v5.l, exec_lo, -1
// GFX12: encoding: [0x05,0x00,0x0d,0xd7,0x7e,0x82,0x01,0x00]
-v_add_nc_i16 v5, exec_hi, null
+v_add_nc_i16 v5.l, exec_hi, null
// GFX12: encoding: [0x05,0x00,0x0d,0xd7,0x7f,0xf8,0x00,0x00]
-v_add_nc_i16 v5, null, exec_lo op_sel:[1,1,1]
+v_add_nc_i16 v5.h, null, exec_lo op_sel:[1,1,1]
// GFX12: encoding: [0x05,0x58,0x0d,0xd7,0x7c,0xfc,0x00,0x00]
-v_add_nc_i16 v5, -1, exec_hi op_sel:[0,0,0]
+v_add_nc_i16 v5.l, -1, exec_hi op_sel:[0,0,0]
// GFX12: encoding: [0x05,0x00,0x0d,0xd7,0xc1,0xfe,0x00,0x00]
-v_add_nc_i16 v5, 0.5, m0 op_sel:[1,0,0]
+v_add_nc_i16 v5.l, 0.5, m0 op_sel:[1,0,0]
// GFX12: encoding: [0x05,0x08,0x0d,0xd7,0xf0,0xfa,0x00,0x00]
-v_add_nc_i16 v5, src_scc, vcc_lo op_sel:[0,1,0]
+v_add_nc_i16 v5.l, src_scc, vcc_lo op_sel:[0,1,0]
// GFX12: encoding: [0x05,0x10,0x0d,0xd7,0xfd,0xd4,0x00,0x00]
-v_add_nc_i16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp
+v_add_nc_i16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp
+// GFX12: encoding: [0xff,0xc0,0x0d,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+
+v_add_nc_i16 v255.h, 0xfe0b, vcc_hi clamp
// GFX12: encoding: [0xff,0xc0,0x0d,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
v_add_nc_i32 v5, v1, v2
@@ -298,49 +307,58 @@ v_add_nc_i32 v5, src_scc, vcc_lo
v_add_nc_i32 v255, 0xaf123456, vcc_hi clamp
// GFX12: encoding: [0xff,0x80,0x26,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf]
-v_add_nc_u16 v5, v1, v2
+v_add_nc_u16 v5.l, v1.l, v2.l
// GFX12: encoding: [0x05,0x00,0x03,0xd7,0x01,0x05,0x02,0x00]
-v_add_nc_u16 v5, v255, v255
+v_add_nc_u16 v5.l, v1.h, v2.l
+// GFX12: encoding: [0x05,0x08,0x03,0xd7,0x01,0x05,0x02,0x00]
+
+v_add_nc_u16 v5.l, v255.l, v255.l
// GFX12: encoding: [0x05,0x00,0x03,0xd7,0xff,0xff,0x03,0x00]
-v_add_nc_u16 v5, s1, s2
+v_add_nc_u16 v5.l, v255.l, v255.h
+// GFX12: encoding: [0x05,0x10,0x03,0xd7,0xff,0xff,0x03,0x00]
+
+v_add_nc_u16 v5.l, s1, s2
// GFX12: encoding: [0x05,0x00,0x03,0xd7,0x01,0x04,0x00,0x00]
-v_add_nc_u16 v5, s105, s105
+v_add_nc_u16 v5.l, s105, s105
// GFX12: encoding: [0x05,0x00,0x03,0xd7,0x69,0xd2,0x00,0x00]
-v_add_nc_u16 v5, vcc_lo, ttmp15
+v_add_nc_u16 v5.l, vcc_lo, ttmp15
// GFX12: encoding: [0x05,0x00,0x03,0xd7,0x6a,0xf6,0x00,0x00]
-v_add_nc_u16 v5, vcc_hi, 0xfe0b
+v_add_nc_u16 v5.l, vcc_hi, 0xfe0b
// GFX12: encoding: [0x05,0x00,0x03,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
-v_add_nc_u16 v5, ttmp15, src_scc
+v_add_nc_u16 v5.l, ttmp15, src_scc
// GFX12: encoding: [0x05,0x00,0x03,0xd7,0x7b,0xfa,0x01,0x00]
-v_add_nc_u16 v5, m0, 0.5
+v_add_nc_u16 v5.l, m0, 0.5
// GFX12: encoding: [0x05,0x00,0x03,0xd7,0x7d,0xe0,0x01,0x00]
-v_add_nc_u16 v5, exec_lo, -1
+v_add_nc_u16 v5.l, exec_lo, -1
// GFX12: encoding: [0x05,0x00,0x03,0xd7,0x7e,0x82,0x01,0x00]
-v_add_nc_u16 v5, exec_hi, null
+v_add_nc_u16 v5.l, exec_hi, null
// GFX12: encoding: [0x05,0x00,0x03,0xd7,0x7f,0xf8,0x00,0x00]
-v_add_nc_u16 v5, null, exec_lo op_sel:[1,1,1]
+v_add_nc_u16 v5.h, null, exec_lo op_sel:[1,1,1]
// GFX12: encoding: [0x05,0x58,0x03,0xd7,0x7c,0xfc,0x00,0x00]
-v_add_nc_u16 v5, -1, exec_hi op_sel:[0,0,0]
+v_add_nc_u16 v5.l, -1, exec_hi op_sel:[0,0,0]
// GFX12: encoding: [0x05,0x00,0x03,0xd7,0xc1,0xfe,0x00,0x00]
-v_add_nc_u16 v5, 0.5, m0 op_sel:[1,0,0]
+v_add_nc_u16 v5.l, 0.5, m0 op_sel:[1,0,0]
// GFX12: encoding: [0x05,0x08,0x03,0xd7,0xf0,0xfa,0x00,0x00]
-v_add_nc_u16 v5, src_scc, vcc_lo op_sel:[0,1,0]
+v_add_nc_u16 v5.l, src_scc, vcc_lo op_sel:[0,1,0]
// GFX12: encoding: [0x05,0x10,0x03,0xd7,0xfd,0xd4,0x00,0x00]
-v_add_nc_u16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp
+v_add_nc_u16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp
+// GFX12: encoding: [0xff,0xc0,0x03,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+
+v_add_nc_u16 v255.h, 0xfe0b, vcc_hi clamp
// GFX12: encoding: [0xff,0xc0,0x03,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
v_alignbit_b32 v5, v1, v2, s3
@@ -5696,49 +5714,58 @@ v_sub_co_u32 v5, ttmp[14:15], src_scc, vcc_lo
v_sub_co_u32 v255, null, 0xaf123456, vcc_hi clamp
// GFX12: encoding: [0xff,0xfc,0x01,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf]
-v_sub_nc_i16 v5, v1, v2
+v_sub_nc_i16 v5.l, v1.l, v2.l
// GFX12: encoding: [0x05,0x00,0x0e,0xd7,0x01,0x05,0x02,0x00]
-v_sub_nc_i16 v5, v255, v255
+v_sub_nc_i16 v5.l, v1.h, v2.l
+// GFX12: encoding: [0x05,0x08,0x0e,0xd7,0x01,0x05,0x02,0x00]
+
+v_sub_nc_i16 v5.l, v255.l, v255.l
// GFX12: encoding: [0x05,0x00,0x0e,0xd7,0xff,0xff,0x03,0x00]
-v_sub_nc_i16 v5, s1, s2
+v_sub_nc_i16 v5.l, v255.l, v255.h
+// GFX12: encoding: [0x05,0x10,0x0e,0xd7,0xff,0xff,0x03,0x00]
+
+v_sub_nc_i16 v5.l, s1, s2
// GFX12: encoding: [0x05,0x00,0x0e,0xd7,0x01,0x04,0x00,0x00]
-v_sub_nc_i16 v5, s105, s105
+v_sub_nc_i16 v5.l, s105, s105
// GFX12: encoding: [0x05,0x00,0x0e,0xd7,0x69,0xd2,0x00,0x00]
-v_sub_nc_i16 v5, vcc_lo, ttmp15
+v_sub_nc_i16 v5.l, vcc_lo, ttmp15
// GFX12: encoding: [0x05,0x00,0x0e,0xd7,0x6a,0xf6,0x00,0x00]
-v_sub_nc_i16 v5, vcc_hi, 0xfe0b
+v_sub_nc_i16 v5.l, vcc_hi, 0xfe0b
// GFX12: encoding: [0x05,0x00,0x0e,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
-v_sub_nc_i16 v5, ttmp15, src_scc
+v_sub_nc_i16 v5.l, ttmp15, src_scc
// GFX12: encoding: [0x05,0x00,0x0e,0xd7,0x7b,0xfa,0x01,0x00]
-v_sub_nc_i16 v5, m0, 0.5
+v_sub_nc_i16 v5.l, m0, 0.5
// GFX12: encoding: [0x05,0x00,0x0e,0xd7,0x7d,0xe0,0x01,0x00]
-v_sub_nc_i16 v5, exec_lo, -1
+v_sub_nc_i16 v5.l, exec_lo, -1
// GFX12: encoding: [0x05,0x00,0x0e,0xd7,0x7e,0x82,0x01,0x00]
-v_sub_nc_i16 v5, exec_hi, null
+v_sub_nc_i16 v5.l, exec_hi, null
// GFX12: encoding: [0x05,0x00,0x0e,0xd7,0x7f,0xf8,0x00,0x00]
-v_sub_nc_i16 v5, null, exec_lo op_sel:[1,1,1]
+v_sub_nc_i16 v5.h, null, exec_lo op_sel:[1,1,1]
// GFX12: encoding: [0x05,0x58,0x0e,0xd7,0x7c,0xfc,0x00,0x00]
-v_sub_nc_i16 v5, -1, exec_hi op_sel:[0,0,0]
+v_sub_nc_i16 v5.l, -1, exec_hi op_sel:[0,0,0]
// GFX12: encoding: [0x05,0x00,0x0e,0xd7,0xc1,0xfe,0x00,0x00]
-v_sub_nc_i16 v5, 0.5, m0 op_sel:[1,0,0]
+v_sub_nc_i16 v5.l, 0.5, m0 op_sel:[1,0,0]
// GFX12: encoding: [0x05,0x08,0x0e,0xd7,0xf0,0xfa,0x00,0x00]
-v_sub_nc_i16 v5, src_scc, vcc_lo op_sel:[0,1,0]
+v_sub_nc_i16 v5.l, src_scc, vcc_lo op_sel:[0,1,0]
// GFX12: encoding: [0x05,0x10,0x0e,0xd7,0xfd,0xd4,0x00,0x00]
-v_sub_nc_i16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp
+v_sub_nc_i16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp
+// GFX12: encoding: [0xff,0xc0,0x0e,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+
+v_sub_nc_i16 v255.h, 0xfe0b, vcc_hi clamp
// GFX12: encoding: [0xff,0xc0,0x0e,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
v_sub_nc_i32 v5, v1, v2
@@ -5786,49 +5813,58 @@ v_sub_nc_i32 v5, src_scc, vcc_lo
v_sub_nc_i32 v255, 0xaf123456, vcc_hi clamp
// GFX12: encoding: [0xff,0x80,0x25,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf]
-v_sub_nc_u16 v5, v1, v2
+v_sub_nc_u16 v5.l, v1.l, v2.l
// GFX12: encoding: [0x05,0x00,0x04,0xd7,0x01,0x05,0x02,0x00]
-v_sub_nc_u16 v5, v255, v255
+v_sub_nc_u16 v5.l, v1.h, v2.l
+// GFX12: encoding: [0x05,0x08,0x04,0xd7,0x01,0x05,0x02,0x00]
+
+v_sub_nc_u16 v5.l, v255.l, v255.l
// GFX12: encoding: [0x05,0x00,0x04,0xd7,0xff,0xff,0x03,0x00]
-v_sub_nc_u16 v5, s1, s2
+v_sub_nc_u16 v5.l, v255.l, v255.h
+// GFX12: encoding: [0x05,0x10,0x04,0xd7,0xff,0xff,0x03,0x00]
+
+v_sub_nc_u16 v5.l, s1, s2
// GFX12: encoding: [0x05,0x00,0x04,0xd7,0x01,0x04,0x00,0x00]
-v_sub_nc_u16 v5, s105, s105
+v_sub_nc_u16 v5.l, s105, s105
// GFX12: encoding: [0x05,0x00,0x04,0xd7,0x69,0xd2,0x00,0x00]
-v_sub_nc_u16 v5, vcc_lo, ttmp15
+v_sub_nc_u16 v5.l, vcc_lo, ttmp15
// GFX12: encoding: [0x05,0x00,0x04,0xd7,0x6a,0xf6,0x00,0x00]
-v_sub_nc_u16 v5, vcc_hi, 0xfe0b
+v_sub_nc_u16 v5.l, vcc_hi, 0xfe0b
// GFX12: encoding: [0x05,0x00,0x04,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
-v_sub_nc_u16 v5, ttmp15, src_scc
+v_sub_nc_u16 v5.l, ttmp15, src_scc
// GFX12: encoding: [0x05,0x00,0x04,0xd7,0x7b,0xfa,0x01,0x00]
-v_sub_nc_u16 v5, m0, 0.5
+v_sub_nc_u16 v5.l, m0, 0.5
// GFX12: encoding: [0x05,0x00,0x04,0xd7,0x7d,0xe0,0x01,0x00]
-v_sub_nc_u16 v5, exec_lo, -1
+v_sub_nc_u16 v5.l, exec_lo, -1
// GFX12: encoding: [0x05,0x00,0x04,0xd7,0x7e,0x82,0x01,0x00]
-v_sub_nc_u16 v5, exec_hi, null
+v_sub_nc_u16 v5.l, exec_hi, null
// GFX12: encoding: [0x05,0x00,0x04,0xd7,0x7f,0xf8,0x00,0x00]
-v_sub_nc_u16 v5, null, exec_lo op_sel:[1,1,1]
+v_sub_nc_u16 v5.h, null, exec_lo op_sel:[1,1,1]
// GFX12: encoding: [0x05,0x58,0x04,0xd7,0x7c,0xfc,0x00,0x00]
-v_sub_nc_u16 v5, -1, exec_hi op_sel:[0,0,0]
+v_sub_nc_u16 v5.l, -1, exec_hi op_sel:[0,0,0]
// GFX12: encoding: [0x05,0x00,0x04,0xd7,0xc1,0xfe,0x00,0x00]
-v_sub_nc_u16 v5, 0.5, m0 op_sel:[1,0,0]
+v_sub_nc_u16 v5.l, 0.5, m0 op_sel:[1,0,0]
// GFX12: encoding: [0x05,0x08,0x04,0xd7,0xf0,0xfa,0x00,0x00]
-v_sub_nc_u16 v5, src_scc, vcc_lo op_sel:[0,1,0]
+v_sub_nc_u16 v5.l, src_scc, vcc_lo op_sel:[0,1,0]
// GFX12: encoding: [0x05,0x10,0x04,0xd7,0xfd,0xd4,0x00,0x00]
-v_sub_nc_u16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp
+v_sub_nc_u16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp
+// GFX12: encoding: [0xff,0xc0,0x04,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+
+v_sub_nc_u16 v255.h, 0xfe0b, vcc_hi clamp
// GFX12: encoding: [0xff,0xc0,0x04,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
v_subrev_co_u32 v5, s6, v1, v2
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s
index adf37901fc85fd..56bd0ee4b47465 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s
@@ -214,47 +214,71 @@ v_add_lshl_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bo
v_add_lshl_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
// GFX12: [0xff,0x00,0x47,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30]
-v_add_nc_i16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0]
-// GFX12: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+v_add_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1
+// GFX12: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
-v_add_nc_i16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3]
-// GFX12: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+v_add_nc_i16_e64_dpp v255.l, v255.l, v255.l clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: [0xff,0x80,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
-v_add_nc_i16_e64_dpp v5, v1, v2 row_mirror
-// GFX12: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+v_add_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
-v_add_nc_i16_e64_dpp v5, v1, v2 row_half_mirror
-// GFX12: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+v_add_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
-v_add_nc_i16_e64_dpp v5, v1, v2 row_shl:1
-// GFX12: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+v_add_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3
+// GFX12: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
-v_add_nc_i16_e64_dpp v5, v1, v2 row_shl:15
-// GFX12: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3]
+// GFX12: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
-v_add_nc_i16_e64_dpp v5, v1, v2 row_shr:1
-// GFX12: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0]
+// GFX12: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
-v_add_nc_i16_e64_dpp v5, v1, v2 row_shr:15
-// GFX12: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_half_mirror
+// GFX12: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+
+v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_mirror
+// GFX12: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
-v_add_nc_i16_e64_dpp v5, v1, v2 row_ror:1
+v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_ror:1
// GFX12: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
-v_add_nc_i16_e64_dpp v5, v1, v2 row_ror:15
+v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_ror:15
// GFX12: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
-v_add_nc_i16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
// GFX12: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
-v_add_nc_i16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1
// GFX12: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
-v_add_nc_i16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shl:1
+// GFX12: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+
+v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shl:15
+// GFX12: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+
+v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shr:1
+// GFX12: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+
+v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shr:15
+// GFX12: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+
+v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
// GFX12: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
-v_add_nc_i16_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// GFX12: [0xff,0x80,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
+v_add_nc_i16_e64_dpp v5.h, v1.h, v2.h row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+
+v_add_nc_i16_e64_dpp v5.l, v1.h, v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+
+v_add_nc_i16_e64_dpp v5.l, v1.l, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+
+v_add_nc_i16_e64_dpp v255.h, v255.l, v255.l clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
v_add_nc_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0]
// GFX12: [0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
@@ -298,47 +322,71 @@ v_add_nc_i32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctr
v_add_nc_i32_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
// GFX12: [0xff,0x80,0x26,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
-v_add_nc_u16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0]
-// GFX12: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+v_add_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1
+// GFX12: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
-v_add_nc_u16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3]
-// GFX12: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+v_add_nc_u16_e64_dpp v255.l, v255.l, v255.l clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: [0xff,0x80,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
-v_add_nc_u16_e64_dpp v5, v1, v2 row_mirror
-// GFX12: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+v_add_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
-v_add_nc_u16_e64_dpp v5, v1, v2 row_half_mirror
-// GFX12: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+v_add_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
-v_add_nc_u16_e64_dpp v5, v1, v2 row_shl:1
-// GFX12: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+v_add_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3
+// GFX12: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
-v_add_nc_u16_e64_dpp v5, v1, v2 row_shl:15
-// GFX12: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3]
+// GFX12: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
-v_add_nc_u16_e64_dpp v5, v1, v2 row_shr:1
-// GFX12: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0]
+// GFX12: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
-v_add_nc_u16_e64_dpp v5, v1, v2 row_shr:15
-// GFX12: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_half_mirror
+// GFX12: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+
+v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_mirror
+// GFX12: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
-v_add_nc_u16_e64_dpp v5, v1, v2 row_ror:1
+v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_ror:1
// GFX12: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
-v_add_nc_u16_e64_dpp v5, v1, v2 row_ror:15
+v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_ror:15
// GFX12: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
-v_add_nc_u16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
// GFX12: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
-v_add_nc_u16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1
// GFX12: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
-v_add_nc_u16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shl:1
+// GFX12: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+
+v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shl:15
+// GFX12: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+
+v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shr:1
+// GFX12: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+
+v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shr:15
+// GFX12: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+
+v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
// GFX12: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
-v_add_nc_u16_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// GFX12: [0xff,0x80,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
+v_add_nc_u16_e64_dpp v5.h, v1.h, v2.h row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+
+v_add_nc_u16_e64_dpp v5.l, v1.h, v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+
+v_add_nc_u16_e64_dpp v5.l, v1.l, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+
+v_add_nc_u16_e64_dpp v255.h, v255.l, v255.l clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
v_alignbit_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0]
// GFX12: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff]
@@ -4622,47 +4670,71 @@ v_sub_co_u32_e64_dpp v5, ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:
v_sub_co_u32_e64_dpp v255, null, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
// GFX12: [0xff,0xfc,0x01,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
-v_sub_nc_i16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0]
-// GFX12: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+v_sub_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1
+// GFX12: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
-v_sub_nc_i16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3]
-// GFX12: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+v_sub_nc_i16_e64_dpp v255.l, v255.l, v255.l clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: [0xff,0x80,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
-v_sub_nc_i16_e64_dpp v5, v1, v2 row_mirror
-// GFX12: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+v_sub_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
-v_sub_nc_i16_e64_dpp v5, v1, v2 row_half_mirror
-// GFX12: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+v_sub_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
-v_sub_nc_i16_e64_dpp v5, v1, v2 row_shl:1
-// GFX12: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3
+// GFX12: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
-v_sub_nc_i16_e64_dpp v5, v1, v2 row_shl:15
-// GFX12: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3]
+// GFX12: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
-v_sub_nc_i16_e64_dpp v5, v1, v2 row_shr:1
-// GFX12: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0]
+// GFX12: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
-v_sub_nc_i16_e64_dpp v5, v1, v2 row_shr:15
-// GFX12: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_half_mirror
+// GFX12: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
-v_sub_nc_i16_e64_dpp v5, v1, v2 row_ror:1
+v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_mirror
+// GFX12: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+
+v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_ror:1
// GFX12: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
-v_sub_nc_i16_e64_dpp v5, v1, v2 row_ror:15
+v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_ror:15
// GFX12: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
-v_sub_nc_i16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
// GFX12: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
-v_sub_nc_i16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1
// GFX12: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
-v_sub_nc_i16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shl:1
+// GFX12: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+
+v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shl:15
+// GFX12: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+
+v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shr:1
+// GFX12: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+
+v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shr:15
+// GFX12: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+
+v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
// GFX12: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
-v_sub_nc_i16_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// GFX12: [0xff,0x80,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
+v_sub_nc_i16_e64_dpp v5.h, v1.h, v2.h row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+
+v_sub_nc_i16_e64_dpp v5.l, v1.h, v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+
+v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+
+v_sub_nc_i16_e64_dpp v255.h, v255.l, v255.l clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
v_sub_nc_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0]
// GFX12: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
@@ -4706,47 +4778,71 @@ v_sub_nc_i32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctr
v_sub_nc_i32_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
// GFX12: [0xff,0x80,0x25,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
-v_sub_nc_u16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0]
-// GFX12: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+v_sub_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1
+// GFX12: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
-v_sub_nc_u16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3]
-// GFX12: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+v_sub_nc_u16_e64_dpp v255.l, v255.l, v255.l clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: [0xff,0x80,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
-v_sub_nc_u16_e64_dpp v5, v1, v2 row_mirror
-// GFX12: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+v_sub_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
-v_sub_nc_u16_e64_dpp v5, v1, v2 row_half_mirror
-// GFX12: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+v_sub_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
-v_sub_nc_u16_e64_dpp v5, v1, v2 row_shl:1
-// GFX12: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3
+// GFX12: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
-v_sub_nc_u16_e64_dpp v5, v1, v2 row_shl:15
-// GFX12: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3]
+// GFX12: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
-v_sub_nc_u16_e64_dpp v5, v1, v2 row_shr:1
-// GFX12: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0]
+// GFX12: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
-v_sub_nc_u16_e64_dpp v5, v1, v2 row_shr:15
-// GFX12: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_half_mirror
+// GFX12: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
-v_sub_nc_u16_e64_dpp v5, v1, v2 row_ror:1
+v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_mirror
+// GFX12: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+
+v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_ror:1
// GFX12: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
-v_sub_nc_u16_e64_dpp v5, v1, v2 row_ror:15
+v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_ror:15
// GFX12: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
-v_sub_nc_u16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
// GFX12: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
-v_sub_nc_u16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1
// GFX12: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
-v_sub_nc_u16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shl:1
+// GFX12: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+
+v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shl:15
+// GFX12: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+
+v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shr:1
+// GFX12: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+
+v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shr:15
+// GFX12: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+
+v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
// GFX12: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
-v_sub_nc_u16_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// GFX12: [0xff,0x80,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
+v_sub_nc_u16_e64_dpp v5.h, v1.h, v2.h row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+
+v_sub_nc_u16_e64_dpp v5.l, v1.h, v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+
+v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+
+v_sub_nc_u16_e64_dpp v255.h, v255.l, v255.l clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
v_subrev_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0]
// W32: [0x05,0x06,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
@@ -5001,30 +5097,6 @@ v_xor_b16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
v_xor_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
// GFX12: [0xff,0x00,0x64,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
-v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf
-// GFX12: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
-
-v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1
-// GFX12: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
-
-v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3
-// GFX12: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
-
-v_add_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1
-// GFX12: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
-
-v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf
-// GFX12: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
-
-v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1
-// GFX12: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
-
-v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3
-// GFX12: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
-
-v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1
-// GFX12: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
-
v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3
// GFX12: [0x05,0x0a,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
@@ -5250,30 +5322,6 @@ v_pack_b32_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 ban
v_pack_b32_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1
// GFX12: [0xff,0x13,0x11,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
-v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf
-// GFX12: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
-
-v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1
-// GFX12: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
-
-v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3
-// GFX12: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
-
-v_sub_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1
-// GFX12: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
-
-v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf
-// GFX12: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
-
-v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1
-// GFX12: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
-
-v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3
-// GFX12: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
-
-v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1
-// GFX12: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
-
v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1
// GFX12: [0x00,0x00,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00]
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s
index 1be122faccbc92..6331d22c6976d9 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s
@@ -134,14 +134,38 @@ v_add_lshl_u32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1
v_add_lshl_u32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0
// GFX12: [0xff,0x00,0x47,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00]
-v_add_nc_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+v_add_nc_i16_e64_dpp v255.l, v255.l, v255.l clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: [0xff,0x80,0x0d,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+
+v_add_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1
+// GFX12: [0xff,0xc0,0x0d,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+
+v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
// GFX12: [0x05,0x00,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-v_add_nc_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1
// GFX12: [0x05,0x00,0x0d,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-v_add_nc_i16_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
-// GFX12: [0xff,0x80,0x0d,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+v_add_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: [0x05,0x10,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_add_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: [0x05,0x08,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_add_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: [0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_add_nc_i16_e64_dpp v5.h, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: [0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_add_nc_i16_e64_dpp v5.l, v1.h, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: [0x05,0x08,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_add_nc_i16_e64_dpp v5.l, v1.l, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: [0x05,0x10,0x0d,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_add_nc_i16_e64_dpp v255.h, v255.l, v255.l clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: [0xff,0xc0,0x0d,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
v_add_nc_i32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX12: [0x05,0x00,0x26,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
@@ -152,14 +176,38 @@ v_add_nc_i32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
v_add_nc_i32_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
// GFX12: [0xff,0x80,0x26,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
-v_add_nc_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+v_add_nc_u16_e64_dpp v255.l, v255.l, v255.l clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: [0xff,0x80,0x03,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+
+v_add_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1
+// GFX12: [0xff,0xc0,0x03,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+
+v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
// GFX12: [0x05,0x00,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-v_add_nc_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1
// GFX12: [0x05,0x00,0x03,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-v_add_nc_u16_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
-// GFX12: [0xff,0x80,0x03,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+v_add_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: [0x05,0x10,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_add_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: [0x05,0x08,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_add_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: [0x05,0x58,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_add_nc_u16_e64_dpp v5.h, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: [0x05,0x58,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_add_nc_u16_e64_dpp v5.l, v1.h, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: [0x05,0x08,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_add_nc_u16_e64_dpp v5.l, v1.l, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: [0x05,0x10,0x03,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_add_nc_u16_e64_dpp v255.h, v255.l, v255.l clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: [0xff,0xc0,0x03,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
v_alignbit_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0]
// GFX12: [0x05,0x00,0x16,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05]
@@ -3043,14 +3091,38 @@ v_sub_co_u32_e64_dpp v5, ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
v_sub_co_u32_e64_dpp v255, null, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
// GFX12: [0xff,0xfc,0x01,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
-v_sub_nc_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+v_sub_nc_i16_e64_dpp v255.l, v255.l, v255.l clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: [0xff,0x80,0x0e,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+
+v_sub_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1
+// GFX12: [0xff,0xc0,0x0e,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+
+v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
// GFX12: [0x05,0x00,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-v_sub_nc_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1
// GFX12: [0x05,0x00,0x0e,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-v_sub_nc_i16_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
-// GFX12: [0xff,0x80,0x0e,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: [0x05,0x10,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_sub_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: [0x05,0x08,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_sub_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: [0x05,0x58,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_sub_nc_i16_e64_dpp v5.h, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: [0x05,0x58,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_sub_nc_i16_e64_dpp v5.l, v1.h, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: [0x05,0x08,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: [0x05,0x10,0x0e,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_sub_nc_i16_e64_dpp v255.h, v255.l, v255.l clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: [0xff,0xc0,0x0e,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
v_sub_nc_i32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX12: [0x05,0x00,0x25,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
@@ -3061,14 +3133,38 @@ v_sub_nc_i32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
v_sub_nc_i32_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
// GFX12: [0xff,0x80,0x25,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
-v_sub_nc_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+v_sub_nc_u16_e64_dpp v255.l, v255.l, v255.l clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: [0xff,0x80,0x04,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+
+v_sub_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1
+// GFX12: [0xff,0xc0,0x04,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+
+v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
// GFX12: [0x05,0x00,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-v_sub_nc_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1
// GFX12: [0x05,0x00,0x04,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-v_sub_nc_u16_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
-// GFX12: [0xff,0x80,0x04,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: [0x05,0x10,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_sub_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: [0x05,0x08,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_sub_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: [0x05,0x58,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_sub_nc_u16_e64_dpp v5.h, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: [0x05,0x58,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_sub_nc_u16_e64_dpp v5.l, v1.h, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: [0x05,0x08,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: [0x05,0x10,0x04,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_sub_nc_u16_e64_dpp v255.h, v255.l, v255.l clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: [0xff,0xc0,0x04,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
v_subrev_co_u32_e64_dpp v5, s6, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
// W32: [0x05,0x06,0x02,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
@@ -3210,30 +3306,6 @@ v_xor_b16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
v_xor_b16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
// GFX12: [0xff,0x00,0x64,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
-v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: [0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-
-v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: [0x05,0x08,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-
-v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: [0x05,0x10,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-
-v_add_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1
-// GFX12: [0xff,0xc0,0x0d,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
-
-v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: [0x05,0x58,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-
-v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: [0x05,0x08,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-
-v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: [0x05,0x10,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-
-v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1
-// GFX12: [0xff,0xc0,0x03,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
-
v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0]
// GFX12: [0x05,0x0a,0x12,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
@@ -3459,30 +3531,6 @@ v_pack_b32_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0]
v_pack_b32_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1
// GFX12: [0xff,0x13,0x11,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
-v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: [0x05,0x58,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-
-v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: [0x05,0x08,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-
-v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: [0x05,0x10,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-
-v_sub_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1
-// GFX12: [0xff,0xc0,0x0e,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
-
-v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: [0x05,0x58,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-
-v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: [0x05,0x08,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-
-v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: [0x05,0x10,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-
-v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1
-// GFX12: [0xff,0xc0,0x04,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
-
v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 dpp8:[0,1,2,3,4,4,4,4]
// GFX12: [0x00,0x00,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3.txt
index 07058a64515920..365caa5f9b6d09 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3.txt
@@ -189,49 +189,112 @@
# GFX11: v_add_lshl_u32 v255, 0xaf123456, vcc_hi, null ; encoding: [0xff,0x00,0x47,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf]
0xff,0x00,0x47,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf
-# GFX11: v_add_nc_i16 v5, v1, v2 ; encoding: [0x05,0x00,0x0d,0xd7,0x01,0x05,0x02,0x00]
-0x05,0x00,0x0d,0xd7,0x01,0x05,0x02,0x00
-
-# GFX11: v_add_nc_i16 v5, v255, v255 ; encoding: [0x05,0x00,0x0d,0xd7,0xff,0xff,0x03,0x00]
-0x05,0x00,0x0d,0xd7,0xff,0xff,0x03,0x00
-
-# GFX11: v_add_nc_i16 v5, s1, s2 ; encoding: [0x05,0x00,0x0d,0xd7,0x01,0x04,0x00,0x00]
+# W32-REAL16: v_add_nc_i16 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0d,0xd7,0x01,0x05,0x02,0x00]
+# W32-FAKE16: v_add_nc_i16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0d,0xd7,0x01,0x05,0x02,0x00]
+# W64-REAL16: v_add_nc_i16 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0d,0xd7,0x01,0x05,0x02,0x00]
+# W64-FAKE16: v_add_nc_i16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0d,0xd7,0x01,0x05,0x02,0x00]
+0x05,0x08,0x0d,0xd7,0x01,0x05,0x02,0x00
+
+# W32-REAL16: v_add_nc_i16 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xff,0xff,0x03,0x00]
+# W32-FAKE16: v_add_nc_i16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xff,0xff,0x03,0x00]
+# W64-REAL16: v_add_nc_i16 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xff,0xff,0x03,0x00]
+# W64-FAKE16: v_add_nc_i16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xff,0xff,0x03,0x00]
+0x05,0x10,0x0d,0xd7,0xff,0xff,0x03,0x00
+
+# W32-REAL16: v_add_nc_i16 v5.l, s1, s2 ; encoding: [0x05,0x00,0x0d,0xd7,0x01,0x04,0x00,0x00]
+# W32-FAKE16: v_add_nc_i16 v5, s1, s2 ; encoding: [0x05,0x00,0x0d,0xd7,0x01,0x04,0x00,0x00]
+# W64-REAL16: v_add_nc_i16 v5.l, s1, s2 ; encoding: [0x05,0x00,0x0d,0xd7,0x01,0x04,0x00,0x00]
+# W64-FAKE16: v_add_nc_i16 v5, s1, s2 ; encoding: [0x05,0x00,0x0d,0xd7,0x01,0x04,0x00,0x00]
0x05,0x00,0x0d,0xd7,0x01,0x04,0x00,0x00
-# GFX11: v_add_nc_i16 v5, s105, s105 ; encoding: [0x05,0x00,0x0d,0xd7,0x69,0xd2,0x00,0x00]
+# W32-REAL16: v_add_nc_i16 v5.l, s105, s105 ; encoding: [0x05,0x00,0x0d,0xd7,0x69,0xd2,0x00,0x00]
+# W32-FAKE16: v_add_nc_i16 v5, s105, s105 ; encoding: [0x05,0x00,0x0d,0xd7,0x69,0xd2,0x00,0x00]
+# W64-REAL16: v_add_nc_i16 v5.l, s105, s105 ; encoding: [0x05,0x00,0x0d,0xd7,0x69,0xd2,0x00,0x00]
+# W64-FAKE16: v_add_nc_i16 v5, s105, s105 ; encoding: [0x05,0x00,0x0d,0xd7,0x69,0xd2,0x00,0x00]
0x05,0x00,0x0d,0xd7,0x69,0xd2,0x00,0x00
-# GFX11: v_add_nc_i16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x0d,0xd7,0x6a,0xf6,0x00,0x00]
+# W32-REAL16: v_add_nc_i16 v5.l, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x0d,0xd7,0x6a,0xf6,0x00,0x00]
+# W32-FAKE16: v_add_nc_i16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x0d,0xd7,0x6a,0xf6,0x00,0x00]
+# W64-REAL16: v_add_nc_i16 v5.l, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x0d,0xd7,0x6a,0xf6,0x00,0x00]
+# W64-FAKE16: v_add_nc_i16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x0d,0xd7,0x6a,0xf6,0x00,0x00]
0x05,0x00,0x0d,0xd7,0x6a,0xf6,0x00,0x00
-# GFX11: v_add_nc_i16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x0d,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+# W32-REAL16: v_add_nc_i16 v5.l, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x0d,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_add_nc_i16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x0d,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_add_nc_i16 v5.l, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x0d,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_add_nc_i16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x0d,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
0x05,0x00,0x0d,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00
-# GFX11: v_add_nc_i16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x0d,0xd7,0x7b,0xfa,0x01,0x00]
+# W32-REAL16: v_add_nc_i16 v5.l, ttmp15, src_scc ; encoding: [0x05,0x00,0x0d,0xd7,0x7b,0xfa,0x01,0x00]
+# W32-FAKE16: v_add_nc_i16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x0d,0xd7,0x7b,0xfa,0x01,0x00]
+# W64-REAL16: v_add_nc_i16 v5.l, ttmp15, src_scc ; encoding: [0x05,0x00,0x0d,0xd7,0x7b,0xfa,0x01,0x00]
+# W64-FAKE16: v_add_nc_i16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x0d,0xd7,0x7b,0xfa,0x01,0x00]
0x05,0x00,0x0d,0xd7,0x7b,0xfa,0x01,0x00
-# GFX11: v_add_nc_i16 v5, m0, 0x3800
+# W32-REAL16: v_add_nc_i16 v5.l, m0, 0x3800 ; encoding: [0x05,0x00,0x0d,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00]
+# W32-FAKE16: v_add_nc_i16 v5, m0, 0x3800 ; encoding: [0x05,0x00,0x0d,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00]
+# W64-REAL16: v_add_nc_i16 v5.l, m0, 0x3800 ; encoding: [0x05,0x00,0x0d,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00]
+# W64-FAKE16: v_add_nc_i16 v5, m0, 0x3800 ; encoding: [0x05,0x00,0x0d,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00]
0x05,0x00,0x0d,0xd7,0x7d,0xe0,0x01,0x00
-# GFX11: v_add_nc_i16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x0d,0xd7,0x7e,0x82,0x01,0x00]
+# W32-REAL16: v_add_nc_i16 v5.l, exec_lo, -1 ; encoding: [0x05,0x00,0x0d,0xd7,0x7e,0x82,0x01,0x00]
+# W32-FAKE16: v_add_nc_i16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x0d,0xd7,0x7e,0x82,0x01,0x00]
+# W64-REAL16: v_add_nc_i16 v5.l, exec_lo, -1 ; encoding: [0x05,0x00,0x0d,0xd7,0x7e,0x82,0x01,0x00]
+# W64-FAKE16: v_add_nc_i16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x0d,0xd7,0x7e,0x82,0x01,0x00]
0x05,0x00,0x0d,0xd7,0x7e,0x82,0x01,0x00
-# GFX11: v_add_nc_i16 v5, exec_hi, null ; encoding: [0x05,0x00,0x0d,0xd7,0x7f,0xf8,0x00,0x00]
+# W32-REAL16: v_add_nc_i16 v5.l, exec_hi, null ; encoding: [0x05,0x00,0x0d,0xd7,0x7f,0xf8,0x00,0x00]
+# W32-FAKE16: v_add_nc_i16 v5, exec_hi, null ; encoding: [0x05,0x00,0x0d,0xd7,0x7f,0xf8,0x00,0x00]
+# W64-REAL16: v_add_nc_i16 v5.l, exec_hi, null ; encoding: [0x05,0x00,0x0d,0xd7,0x7f,0xf8,0x00,0x00]
+# W64-FAKE16: v_add_nc_i16 v5, exec_hi, null ; encoding: [0x05,0x00,0x0d,0xd7,0x7f,0xf8,0x00,0x00]
0x05,0x00,0x0d,0xd7,0x7f,0xf8,0x00,0x00
-# GFX11: v_add_nc_i16 v5, null, exec_lo ; encoding: [0x05,0x00,0x0d,0xd7,0x7c,0xfc,0x00,0x00]
+# W32-REAL16: v_add_nc_i16 v5.l, null, exec_lo ; encoding: [0x05,0x00,0x0d,0xd7,0x7c,0xfc,0x00,0x00]
+# W32-FAKE16: v_add_nc_i16 v5, null, exec_lo ; encoding: [0x05,0x00,0x0d,0xd7,0x7c,0xfc,0x00,0x00]
+# W64-REAL16: v_add_nc_i16 v5.l, null, exec_lo ; encoding: [0x05,0x00,0x0d,0xd7,0x7c,0xfc,0x00,0x00]
+# W64-FAKE16: v_add_nc_i16 v5, null, exec_lo ; encoding: [0x05,0x00,0x0d,0xd7,0x7c,0xfc,0x00,0x00]
0x05,0x00,0x0d,0xd7,0x7c,0xfc,0x00,0x00
-# GFX11: v_add_nc_i16 v5, -1, exec_hi op_sel:[1,1,1] ; encoding: [0x05,0x58,0x0d,0xd7,0xc1,0xfe,0x00,0x00]
-0x05,0x58,0x0d,0xd7,0xc1,0xfe,0x00,0x00
-
-# GFX11: v_add_nc_i16 v5, 0x3800, m0 op_sel:[1,0,0]
-0x05,0x08,0x0d,0xd7,0xf0,0xfa,0x00,0x00
-
-# GFX11: v_add_nc_i16 v5, src_scc, vcc_lo op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xfd,0xd4,0x00,0x00]
-0x05,0x10,0x0d,0xd7,0xfd,0xd4,0x00,0x00
+# W32-REAL16: v_add_nc_i16 v5.l, -1, exec_hi ; encoding: [0x05,0x00,0x0d,0xd7,0xc1,0xfe,0x00,0x00]
+# W32-FAKE16: v_add_nc_i16 v5, -1, exec_hi ; encoding: [0x05,0x00,0x0d,0xd7,0xc1,0xfe,0x00,0x00]
+# W64-REAL16: v_add_nc_i16 v5.l, -1, exec_hi ; encoding: [0x05,0x00,0x0d,0xd7,0xc1,0xfe,0x00,0x00]
+# W64-FAKE16: v_add_nc_i16 v5, -1, exec_hi ; encoding: [0x05,0x00,0x0d,0xd7,0xc1,0xfe,0x00,0x00]
+0x05,0x00,0x0d,0xd7,0xc1,0xfe,0x00,0x00
+
+# W32-REAL16: v_add_nc_i16 v5.l, 0x3800, m0 ; encoding: [0x05,0x00,0x0d,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00]
+# W32-FAKE16: v_add_nc_i16 v5, 0x3800, m0 ; encoding: [0x05,0x00,0x0d,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00]
+# W64-REAL16: v_add_nc_i16 v5.l, 0x3800, m0 ; encoding: [0x05,0x00,0x0d,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00]
+# W64-FAKE16: v_add_nc_i16 v5, 0x3800, m0 ; encoding: [0x05,0x00,0x0d,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00]
+0x05,0x00,0x0d,0xd7,0xf0,0xfa,0x00,0x00
+
+# W32-REAL16: v_add_nc_i16 v5.l, src_scc, vcc_lo ; encoding: [0x05,0x00,0x0d,0xd7,0xfd,0xd4,0x00,0x00]
+# W32-FAKE16: v_add_nc_i16 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x0d,0xd7,0xfd,0xd4,0x00,0x00]
+# W64-REAL16: v_add_nc_i16 v5.l, src_scc, vcc_lo ; encoding: [0x05,0x00,0x0d,0xd7,0xfd,0xd4,0x00,0x00]
+# W64-FAKE16: v_add_nc_i16 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x0d,0xd7,0xfd,0xd4,0x00,0x00]
+0x05,0x00,0x0d,0xd7,0xfd,0xd4,0x00,0x00
+
+# W32-REAL16: v_add_nc_i16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0d,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_add_nc_i16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0d,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_add_nc_i16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0d,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_add_nc_i16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0d,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+0xff,0xc0,0x0d,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00
-# GFX11: v_add_nc_i16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0d,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W32-REAL16: v_add_nc_i16 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0d,0xd7,0x01,0x05,0x02,0x00]
+# W32-FAKE16: v_add_nc_i16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0d,0xd7,0x01,0x05,0x02,0x00]
+# W64-REAL16: v_add_nc_i16 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0d,0xd7,0x01,0x05,0x02,0x00]
+# W64-FAKE16: v_add_nc_i16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0d,0xd7,0x01,0x05,0x02,0x00]
+0x05,0x08,0x0d,0xd7,0x01,0x05,0x02,0x00
+
+# W32-REAL16: v_add_nc_i16 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xff,0xff,0x03,0x00]
+# W32-FAKE16: v_add_nc_i16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xff,0xff,0x03,0x00]
+# W64-REAL16: v_add_nc_i16 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xff,0xff,0x03,0x00]
+# W64-FAKE16: v_add_nc_i16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xff,0xff,0x03,0x00]
+0x05,0x10,0x0d,0xd7,0xff,0xff,0x03,0x00
+
+# W32-REAL16: v_add_nc_i16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0d,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_add_nc_i16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0d,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_add_nc_i16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0d,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_add_nc_i16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0d,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
0xff,0xc0,0x0d,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00
# GFX11: v_add_nc_i32 v5, v1, v2 ; encoding: [0x05,0x00,0x26,0xd7,0x01,0x05,0x02,0x00]
@@ -279,49 +342,112 @@
# GFX11: v_add_nc_i32 v255, 0xaf123456, vcc_hi clamp ; encoding: [0xff,0x80,0x26,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf]
0xff,0x80,0x26,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf
-# GFX11: v_add_nc_u16 v5, v1, v2 ; encoding: [0x05,0x00,0x03,0xd7,0x01,0x05,0x02,0x00]
-0x05,0x00,0x03,0xd7,0x01,0x05,0x02,0x00
-
-# GFX11: v_add_nc_u16 v5, v255, v255 ; encoding: [0x05,0x00,0x03,0xd7,0xff,0xff,0x03,0x00]
-0x05,0x00,0x03,0xd7,0xff,0xff,0x03,0x00
-
-# GFX11: v_add_nc_u16 v5, s1, s2 ; encoding: [0x05,0x00,0x03,0xd7,0x01,0x04,0x00,0x00]
+# W32-REAL16: v_add_nc_u16 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x03,0xd7,0x01,0x05,0x02,0x00]
+# W32-FAKE16: v_add_nc_u16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x03,0xd7,0x01,0x05,0x02,0x00]
+# W64-REAL16: v_add_nc_u16 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x03,0xd7,0x01,0x05,0x02,0x00]
+# W64-FAKE16: v_add_nc_u16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x03,0xd7,0x01,0x05,0x02,0x00]
+0x05,0x08,0x03,0xd7,0x01,0x05,0x02,0x00
+
+# W32-REAL16: v_add_nc_u16 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xff,0xff,0x03,0x00]
+# W32-FAKE16: v_add_nc_u16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xff,0xff,0x03,0x00]
+# W64-REAL16: v_add_nc_u16 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xff,0xff,0x03,0x00]
+# W64-FAKE16: v_add_nc_u16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xff,0xff,0x03,0x00]
+0x05,0x10,0x03,0xd7,0xff,0xff,0x03,0x00
+
+# W32-REAL16: v_add_nc_u16 v5.l, s1, s2 ; encoding: [0x05,0x00,0x03,0xd7,0x01,0x04,0x00,0x00]
+# W32-FAKE16: v_add_nc_u16 v5, s1, s2 ; encoding: [0x05,0x00,0x03,0xd7,0x01,0x04,0x00,0x00]
+# W64-REAL16: v_add_nc_u16 v5.l, s1, s2 ; encoding: [0x05,0x00,0x03,0xd7,0x01,0x04,0x00,0x00]
+# W64-FAKE16: v_add_nc_u16 v5, s1, s2 ; encoding: [0x05,0x00,0x03,0xd7,0x01,0x04,0x00,0x00]
0x05,0x00,0x03,0xd7,0x01,0x04,0x00,0x00
-# GFX11: v_add_nc_u16 v5, s105, s105 ; encoding: [0x05,0x00,0x03,0xd7,0x69,0xd2,0x00,0x00]
+# W32-REAL16: v_add_nc_u16 v5.l, s105, s105 ; encoding: [0x05,0x00,0x03,0xd7,0x69,0xd2,0x00,0x00]
+# W32-FAKE16: v_add_nc_u16 v5, s105, s105 ; encoding: [0x05,0x00,0x03,0xd7,0x69,0xd2,0x00,0x00]
+# W64-REAL16: v_add_nc_u16 v5.l, s105, s105 ; encoding: [0x05,0x00,0x03,0xd7,0x69,0xd2,0x00,0x00]
+# W64-FAKE16: v_add_nc_u16 v5, s105, s105 ; encoding: [0x05,0x00,0x03,0xd7,0x69,0xd2,0x00,0x00]
0x05,0x00,0x03,0xd7,0x69,0xd2,0x00,0x00
-# GFX11: v_add_nc_u16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x03,0xd7,0x6a,0xf6,0x00,0x00]
+# W32-REAL16: v_add_nc_u16 v5.l, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x03,0xd7,0x6a,0xf6,0x00,0x00]
+# W32-FAKE16: v_add_nc_u16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x03,0xd7,0x6a,0xf6,0x00,0x00]
+# W64-REAL16: v_add_nc_u16 v5.l, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x03,0xd7,0x6a,0xf6,0x00,0x00]
+# W64-FAKE16: v_add_nc_u16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x03,0xd7,0x6a,0xf6,0x00,0x00]
0x05,0x00,0x03,0xd7,0x6a,0xf6,0x00,0x00
-# GFX11: v_add_nc_u16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x03,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+# W32-REAL16: v_add_nc_u16 v5.l, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x03,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_add_nc_u16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x03,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_add_nc_u16 v5.l, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x03,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_add_nc_u16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x03,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
0x05,0x00,0x03,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00
-# GFX11: v_add_nc_u16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x03,0xd7,0x7b,0xfa,0x01,0x00]
+# W32-REAL16: v_add_nc_u16 v5.l, ttmp15, src_scc ; encoding: [0x05,0x00,0x03,0xd7,0x7b,0xfa,0x01,0x00]
+# W32-FAKE16: v_add_nc_u16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x03,0xd7,0x7b,0xfa,0x01,0x00]
+# W64-REAL16: v_add_nc_u16 v5.l, ttmp15, src_scc ; encoding: [0x05,0x00,0x03,0xd7,0x7b,0xfa,0x01,0x00]
+# W64-FAKE16: v_add_nc_u16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x03,0xd7,0x7b,0xfa,0x01,0x00]
0x05,0x00,0x03,0xd7,0x7b,0xfa,0x01,0x00
-# GFX11: v_add_nc_u16 v5, m0, 0x3800
+# W32-REAL16: v_add_nc_u16 v5.l, m0, 0x3800 ; encoding: [0x05,0x00,0x03,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00]
+# W32-FAKE16: v_add_nc_u16 v5, m0, 0x3800 ; encoding: [0x05,0x00,0x03,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00]
+# W64-REAL16: v_add_nc_u16 v5.l, m0, 0x3800 ; encoding: [0x05,0x00,0x03,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00]
+# W64-FAKE16: v_add_nc_u16 v5, m0, 0x3800 ; encoding: [0x05,0x00,0x03,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00]
0x05,0x00,0x03,0xd7,0x7d,0xe0,0x01,0x00
-# GFX11: v_add_nc_u16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x03,0xd7,0x7e,0x82,0x01,0x00]
+# W32-REAL16: v_add_nc_u16 v5.l, exec_lo, -1 ; encoding: [0x05,0x00,0x03,0xd7,0x7e,0x82,0x01,0x00]
+# W32-FAKE16: v_add_nc_u16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x03,0xd7,0x7e,0x82,0x01,0x00]
+# W64-REAL16: v_add_nc_u16 v5.l, exec_lo, -1 ; encoding: [0x05,0x00,0x03,0xd7,0x7e,0x82,0x01,0x00]
+# W64-FAKE16: v_add_nc_u16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x03,0xd7,0x7e,0x82,0x01,0x00]
0x05,0x00,0x03,0xd7,0x7e,0x82,0x01,0x00
-# GFX11: v_add_nc_u16 v5, exec_hi, null ; encoding: [0x05,0x00,0x03,0xd7,0x7f,0xf8,0x00,0x00]
+# W32-REAL16: v_add_nc_u16 v5.l, exec_hi, null ; encoding: [0x05,0x00,0x03,0xd7,0x7f,0xf8,0x00,0x00]
+# W32-FAKE16: v_add_nc_u16 v5, exec_hi, null ; encoding: [0x05,0x00,0x03,0xd7,0x7f,0xf8,0x00,0x00]
+# W64-REAL16: v_add_nc_u16 v5.l, exec_hi, null ; encoding: [0x05,0x00,0x03,0xd7,0x7f,0xf8,0x00,0x00]
+# W64-FAKE16: v_add_nc_u16 v5, exec_hi, null ; encoding: [0x05,0x00,0x03,0xd7,0x7f,0xf8,0x00,0x00]
0x05,0x00,0x03,0xd7,0x7f,0xf8,0x00,0x00
-# GFX11: v_add_nc_u16 v5, null, exec_lo ; encoding: [0x05,0x00,0x03,0xd7,0x7c,0xfc,0x00,0x00]
+# W32-REAL16: v_add_nc_u16 v5.l, null, exec_lo ; encoding: [0x05,0x00,0x03,0xd7,0x7c,0xfc,0x00,0x00]
+# W32-FAKE16: v_add_nc_u16 v5, null, exec_lo ; encoding: [0x05,0x00,0x03,0xd7,0x7c,0xfc,0x00,0x00]
+# W64-REAL16: v_add_nc_u16 v5.l, null, exec_lo ; encoding: [0x05,0x00,0x03,0xd7,0x7c,0xfc,0x00,0x00]
+# W64-FAKE16: v_add_nc_u16 v5, null, exec_lo ; encoding: [0x05,0x00,0x03,0xd7,0x7c,0xfc,0x00,0x00]
0x05,0x00,0x03,0xd7,0x7c,0xfc,0x00,0x00
-# GFX11: v_add_nc_u16 v5, -1, exec_hi op_sel:[1,1,1] ; encoding: [0x05,0x58,0x03,0xd7,0xc1,0xfe,0x00,0x00]
-0x05,0x58,0x03,0xd7,0xc1,0xfe,0x00,0x00
-
-# GFX11: v_add_nc_u16 v5, 0x3800, m0 op_sel:[1,0,0]
-0x05,0x08,0x03,0xd7,0xf0,0xfa,0x00,0x00
-
-# GFX11: v_add_nc_u16 v5, src_scc, vcc_lo op_sel:[0,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xfd,0xd4,0x00,0x00]
-0x05,0x10,0x03,0xd7,0xfd,0xd4,0x00,0x00
+# W32-REAL16: v_add_nc_u16 v5.l, -1, exec_hi ; encoding: [0x05,0x00,0x03,0xd7,0xc1,0xfe,0x00,0x00]
+# W32-FAKE16: v_add_nc_u16 v5, -1, exec_hi ; encoding: [0x05,0x00,0x03,0xd7,0xc1,0xfe,0x00,0x00]
+# W64-REAL16: v_add_nc_u16 v5.l, -1, exec_hi ; encoding: [0x05,0x00,0x03,0xd7,0xc1,0xfe,0x00,0x00]
+# W64-FAKE16: v_add_nc_u16 v5, -1, exec_hi ; encoding: [0x05,0x00,0x03,0xd7,0xc1,0xfe,0x00,0x00]
+0x05,0x00,0x03,0xd7,0xc1,0xfe,0x00,0x00
+
+# W32-REAL16: v_add_nc_u16 v5.l, 0x3800, m0 ; encoding: [0x05,0x00,0x03,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00]
+# W32-FAKE16: v_add_nc_u16 v5, 0x3800, m0 ; encoding: [0x05,0x00,0x03,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00]
+# W64-REAL16: v_add_nc_u16 v5.l, 0x3800, m0 ; encoding: [0x05,0x00,0x03,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00]
+# W64-FAKE16: v_add_nc_u16 v5, 0x3800, m0 ; encoding: [0x05,0x00,0x03,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00]
+0x05,0x00,0x03,0xd7,0xf0,0xfa,0x00,0x00
+
+# W32-REAL16: v_add_nc_u16 v5.l, src_scc, vcc_lo ; encoding: [0x05,0x00,0x03,0xd7,0xfd,0xd4,0x00,0x00]
+# W32-FAKE16: v_add_nc_u16 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x03,0xd7,0xfd,0xd4,0x00,0x00]
+# W64-REAL16: v_add_nc_u16 v5.l, src_scc, vcc_lo ; encoding: [0x05,0x00,0x03,0xd7,0xfd,0xd4,0x00,0x00]
+# W64-FAKE16: v_add_nc_u16 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x03,0xd7,0xfd,0xd4,0x00,0x00]
+0x05,0x00,0x03,0xd7,0xfd,0xd4,0x00,0x00
+
+# W32-REAL16: v_add_nc_u16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x03,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_add_nc_u16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x03,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_add_nc_u16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x03,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_add_nc_u16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x03,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+0xff,0xc0,0x03,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00
-# GFX11: v_add_nc_u16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x03,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W32-REAL16: v_add_nc_u16 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x03,0xd7,0x01,0x05,0x02,0x00]
+# W32-FAKE16: v_add_nc_u16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x03,0xd7,0x01,0x05,0x02,0x00]
+# W64-REAL16: v_add_nc_u16 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x03,0xd7,0x01,0x05,0x02,0x00]
+# W64-FAKE16: v_add_nc_u16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x03,0xd7,0x01,0x05,0x02,0x00]
+0x05,0x08,0x03,0xd7,0x01,0x05,0x02,0x00
+
+# W32-REAL16: v_add_nc_u16 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xff,0xff,0x03,0x00]
+# W32-FAKE16: v_add_nc_u16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xff,0xff,0x03,0x00]
+# W64-REAL16: v_add_nc_u16 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xff,0xff,0x03,0x00]
+# W64-FAKE16: v_add_nc_u16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xff,0xff,0x03,0x00]
+0x05,0x10,0x03,0xd7,0xff,0xff,0x03,0x00
+
+# W32-REAL16: v_add_nc_u16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x03,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_add_nc_u16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x03,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_add_nc_u16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x03,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_add_nc_u16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x03,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
0xff,0xc0,0x03,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00
# GFX11: v_alignbit_b32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x16,0xd6,0x01,0x05,0x0e,0x00]
@@ -5871,49 +5997,112 @@
# GFX11: v_sub_co_u32 v255, null, 0xaf123456, vcc_hi clamp ; encoding: [0xff,0xfc,0x01,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf]
0xff,0xfc,0x01,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf
-# GFX11: v_sub_nc_i16 v5, v1, v2 ; encoding: [0x05,0x00,0x0e,0xd7,0x01,0x05,0x02,0x00]
-0x05,0x00,0x0e,0xd7,0x01,0x05,0x02,0x00
-
-# GFX11: v_sub_nc_i16 v5, v255, v255 ; encoding: [0x05,0x00,0x0e,0xd7,0xff,0xff,0x03,0x00]
-0x05,0x00,0x0e,0xd7,0xff,0xff,0x03,0x00
-
-# GFX11: v_sub_nc_i16 v5, s1, s2 ; encoding: [0x05,0x00,0x0e,0xd7,0x01,0x04,0x00,0x00]
+# W32-REAL16: v_sub_nc_i16 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0e,0xd7,0x01,0x05,0x02,0x00]
+# W32-FAKE16: v_sub_nc_i16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0e,0xd7,0x01,0x05,0x02,0x00]
+# W64-REAL16: v_sub_nc_i16 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0e,0xd7,0x01,0x05,0x02,0x00]
+# W64-FAKE16: v_sub_nc_i16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0e,0xd7,0x01,0x05,0x02,0x00]
+0x05,0x08,0x0e,0xd7,0x01,0x05,0x02,0x00
+
+# W32-REAL16: v_sub_nc_i16 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xff,0xff,0x03,0x00]
+# W32-FAKE16: v_sub_nc_i16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xff,0xff,0x03,0x00]
+# W64-REAL16: v_sub_nc_i16 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xff,0xff,0x03,0x00]
+# W64-FAKE16: v_sub_nc_i16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xff,0xff,0x03,0x00]
+0x05,0x10,0x0e,0xd7,0xff,0xff,0x03,0x00
+
+# W32-REAL16: v_sub_nc_i16 v5.l, s1, s2 ; encoding: [0x05,0x00,0x0e,0xd7,0x01,0x04,0x00,0x00]
+# W32-FAKE16: v_sub_nc_i16 v5, s1, s2 ; encoding: [0x05,0x00,0x0e,0xd7,0x01,0x04,0x00,0x00]
+# W64-REAL16: v_sub_nc_i16 v5.l, s1, s2 ; encoding: [0x05,0x00,0x0e,0xd7,0x01,0x04,0x00,0x00]
+# W64-FAKE16: v_sub_nc_i16 v5, s1, s2 ; encoding: [0x05,0x00,0x0e,0xd7,0x01,0x04,0x00,0x00]
0x05,0x00,0x0e,0xd7,0x01,0x04,0x00,0x00
-# GFX11: v_sub_nc_i16 v5, s105, s105 ; encoding: [0x05,0x00,0x0e,0xd7,0x69,0xd2,0x00,0x00]
+# W32-REAL16: v_sub_nc_i16 v5.l, s105, s105 ; encoding: [0x05,0x00,0x0e,0xd7,0x69,0xd2,0x00,0x00]
+# W32-FAKE16: v_sub_nc_i16 v5, s105, s105 ; encoding: [0x05,0x00,0x0e,0xd7,0x69,0xd2,0x00,0x00]
+# W64-REAL16: v_sub_nc_i16 v5.l, s105, s105 ; encoding: [0x05,0x00,0x0e,0xd7,0x69,0xd2,0x00,0x00]
+# W64-FAKE16: v_sub_nc_i16 v5, s105, s105 ; encoding: [0x05,0x00,0x0e,0xd7,0x69,0xd2,0x00,0x00]
0x05,0x00,0x0e,0xd7,0x69,0xd2,0x00,0x00
-# GFX11: v_sub_nc_i16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x0e,0xd7,0x6a,0xf6,0x00,0x00]
+# W32-REAL16: v_sub_nc_i16 v5.l, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x0e,0xd7,0x6a,0xf6,0x00,0x00]
+# W32-FAKE16: v_sub_nc_i16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x0e,0xd7,0x6a,0xf6,0x00,0x00]
+# W64-REAL16: v_sub_nc_i16 v5.l, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x0e,0xd7,0x6a,0xf6,0x00,0x00]
+# W64-FAKE16: v_sub_nc_i16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x0e,0xd7,0x6a,0xf6,0x00,0x00]
0x05,0x00,0x0e,0xd7,0x6a,0xf6,0x00,0x00
-# GFX11: v_sub_nc_i16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x0e,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+# W32-REAL16: v_sub_nc_i16 v5.l, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x0e,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_sub_nc_i16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x0e,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_sub_nc_i16 v5.l, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x0e,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_sub_nc_i16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x0e,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
0x05,0x00,0x0e,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00
-# GFX11: v_sub_nc_i16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x0e,0xd7,0x7b,0xfa,0x01,0x00]
+# W32-REAL16: v_sub_nc_i16 v5.l, ttmp15, src_scc ; encoding: [0x05,0x00,0x0e,0xd7,0x7b,0xfa,0x01,0x00]
+# W32-FAKE16: v_sub_nc_i16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x0e,0xd7,0x7b,0xfa,0x01,0x00]
+# W64-REAL16: v_sub_nc_i16 v5.l, ttmp15, src_scc ; encoding: [0x05,0x00,0x0e,0xd7,0x7b,0xfa,0x01,0x00]
+# W64-FAKE16: v_sub_nc_i16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x0e,0xd7,0x7b,0xfa,0x01,0x00]
0x05,0x00,0x0e,0xd7,0x7b,0xfa,0x01,0x00
-# GFX11: v_sub_nc_i16 v5, m0, 0x3800
+# W32-REAL16: v_sub_nc_i16 v5.l, m0, 0x3800 ; encoding: [0x05,0x00,0x0e,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00]
+# W32-FAKE16: v_sub_nc_i16 v5, m0, 0x3800 ; encoding: [0x05,0x00,0x0e,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00]
+# W64-REAL16: v_sub_nc_i16 v5.l, m0, 0x3800 ; encoding: [0x05,0x00,0x0e,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00]
+# W64-FAKE16: v_sub_nc_i16 v5, m0, 0x3800 ; encoding: [0x05,0x00,0x0e,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00]
0x05,0x00,0x0e,0xd7,0x7d,0xe0,0x01,0x00
-# GFX11: v_sub_nc_i16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x0e,0xd7,0x7e,0x82,0x01,0x00]
+# W32-REAL16: v_sub_nc_i16 v5.l, exec_lo, -1 ; encoding: [0x05,0x00,0x0e,0xd7,0x7e,0x82,0x01,0x00]
+# W32-FAKE16: v_sub_nc_i16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x0e,0xd7,0x7e,0x82,0x01,0x00]
+# W64-REAL16: v_sub_nc_i16 v5.l, exec_lo, -1 ; encoding: [0x05,0x00,0x0e,0xd7,0x7e,0x82,0x01,0x00]
+# W64-FAKE16: v_sub_nc_i16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x0e,0xd7,0x7e,0x82,0x01,0x00]
0x05,0x00,0x0e,0xd7,0x7e,0x82,0x01,0x00
-# GFX11: v_sub_nc_i16 v5, exec_hi, null ; encoding: [0x05,0x00,0x0e,0xd7,0x7f,0xf8,0x00,0x00]
+# W32-REAL16: v_sub_nc_i16 v5.l, exec_hi, null ; encoding: [0x05,0x00,0x0e,0xd7,0x7f,0xf8,0x00,0x00]
+# W32-FAKE16: v_sub_nc_i16 v5, exec_hi, null ; encoding: [0x05,0x00,0x0e,0xd7,0x7f,0xf8,0x00,0x00]
+# W64-REAL16: v_sub_nc_i16 v5.l, exec_hi, null ; encoding: [0x05,0x00,0x0e,0xd7,0x7f,0xf8,0x00,0x00]
+# W64-FAKE16: v_sub_nc_i16 v5, exec_hi, null ; encoding: [0x05,0x00,0x0e,0xd7,0x7f,0xf8,0x00,0x00]
0x05,0x00,0x0e,0xd7,0x7f,0xf8,0x00,0x00
-# GFX11: v_sub_nc_i16 v5, null, exec_lo ; encoding: [0x05,0x00,0x0e,0xd7,0x7c,0xfc,0x00,0x00]
+# W32-REAL16: v_sub_nc_i16 v5.l, null, exec_lo ; encoding: [0x05,0x00,0x0e,0xd7,0x7c,0xfc,0x00,0x00]
+# W32-FAKE16: v_sub_nc_i16 v5, null, exec_lo ; encoding: [0x05,0x00,0x0e,0xd7,0x7c,0xfc,0x00,0x00]
+# W64-REAL16: v_sub_nc_i16 v5.l, null, exec_lo ; encoding: [0x05,0x00,0x0e,0xd7,0x7c,0xfc,0x00,0x00]
+# W64-FAKE16: v_sub_nc_i16 v5, null, exec_lo ; encoding: [0x05,0x00,0x0e,0xd7,0x7c,0xfc,0x00,0x00]
0x05,0x00,0x0e,0xd7,0x7c,0xfc,0x00,0x00
-# GFX11: v_sub_nc_i16 v5, -1, exec_hi op_sel:[1,1,1] ; encoding: [0x05,0x58,0x0e,0xd7,0xc1,0xfe,0x00,0x00]
-0x05,0x58,0x0e,0xd7,0xc1,0xfe,0x00,0x00
-
-# GFX11: v_sub_nc_i16 v5, 0x3800, m0 op_sel:[1,0,0]
-0x05,0x08,0x0e,0xd7,0xf0,0xfa,0x00,0x00
-
-# GFX11: v_sub_nc_i16 v5, src_scc, vcc_lo op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xfd,0xd4,0x00,0x00]
-0x05,0x10,0x0e,0xd7,0xfd,0xd4,0x00,0x00
+# W32-REAL16: v_sub_nc_i16 v5.l, -1, exec_hi ; encoding: [0x05,0x00,0x0e,0xd7,0xc1,0xfe,0x00,0x00]
+# W32-FAKE16: v_sub_nc_i16 v5, -1, exec_hi ; encoding: [0x05,0x00,0x0e,0xd7,0xc1,0xfe,0x00,0x00]
+# W64-REAL16: v_sub_nc_i16 v5.l, -1, exec_hi ; encoding: [0x05,0x00,0x0e,0xd7,0xc1,0xfe,0x00,0x00]
+# W64-FAKE16: v_sub_nc_i16 v5, -1, exec_hi ; encoding: [0x05,0x00,0x0e,0xd7,0xc1,0xfe,0x00,0x00]
+0x05,0x00,0x0e,0xd7,0xc1,0xfe,0x00,0x00
+
+# W32-REAL16: v_sub_nc_i16 v5.l, 0x3800, m0 ; encoding: [0x05,0x00,0x0e,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00]
+# W32-FAKE16: v_sub_nc_i16 v5, 0x3800, m0 ; encoding: [0x05,0x00,0x0e,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00]
+# W64-REAL16: v_sub_nc_i16 v5.l, 0x3800, m0 ; encoding: [0x05,0x00,0x0e,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00]
+# W64-FAKE16: v_sub_nc_i16 v5, 0x3800, m0 ; encoding: [0x05,0x00,0x0e,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00]
+0x05,0x00,0x0e,0xd7,0xf0,0xfa,0x00,0x00
+
+# W32-REAL16: v_sub_nc_i16 v5.l, src_scc, vcc_lo ; encoding: [0x05,0x00,0x0e,0xd7,0xfd,0xd4,0x00,0x00]
+# W32-FAKE16: v_sub_nc_i16 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x0e,0xd7,0xfd,0xd4,0x00,0x00]
+# W64-REAL16: v_sub_nc_i16 v5.l, src_scc, vcc_lo ; encoding: [0x05,0x00,0x0e,0xd7,0xfd,0xd4,0x00,0x00]
+# W64-FAKE16: v_sub_nc_i16 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x0e,0xd7,0xfd,0xd4,0x00,0x00]
+0x05,0x00,0x0e,0xd7,0xfd,0xd4,0x00,0x00
+
+# W32-REAL16: v_sub_nc_i16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0e,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_sub_nc_i16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0e,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_sub_nc_i16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0e,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_sub_nc_i16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0e,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+0xff,0xc0,0x0e,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00
-# GFX11: v_sub_nc_i16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0e,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W32-REAL16: v_sub_nc_i16 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0e,0xd7,0x01,0x05,0x02,0x00]
+# W32-FAKE16: v_sub_nc_i16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0e,0xd7,0x01,0x05,0x02,0x00]
+# W64-REAL16: v_sub_nc_i16 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0e,0xd7,0x01,0x05,0x02,0x00]
+# W64-FAKE16: v_sub_nc_i16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0e,0xd7,0x01,0x05,0x02,0x00]
+0x05,0x08,0x0e,0xd7,0x01,0x05,0x02,0x00
+
+# W32-REAL16: v_sub_nc_i16 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xff,0xff,0x03,0x00]
+# W32-FAKE16: v_sub_nc_i16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xff,0xff,0x03,0x00]
+# W64-REAL16: v_sub_nc_i16 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xff,0xff,0x03,0x00]
+# W64-FAKE16: v_sub_nc_i16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xff,0xff,0x03,0x00]
+0x05,0x10,0x0e,0xd7,0xff,0xff,0x03,0x00
+
+# W32-REAL16: v_sub_nc_i16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0e,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_sub_nc_i16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0e,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_sub_nc_i16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0e,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_sub_nc_i16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0e,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
0xff,0xc0,0x0e,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00
# GFX11: v_sub_nc_i32 v5, v1, v2 ; encoding: [0x05,0x00,0x25,0xd7,0x01,0x05,0x02,0x00]
@@ -5961,49 +6150,112 @@
# GFX11: v_sub_nc_i32 v255, 0xaf123456, vcc_hi clamp ; encoding: [0xff,0x80,0x25,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf]
0xff,0x80,0x25,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf
-# GFX11: v_sub_nc_u16 v5, v1, v2 ; encoding: [0x05,0x00,0x04,0xd7,0x01,0x05,0x02,0x00]
-0x05,0x00,0x04,0xd7,0x01,0x05,0x02,0x00
-
-# GFX11: v_sub_nc_u16 v5, v255, v255 ; encoding: [0x05,0x00,0x04,0xd7,0xff,0xff,0x03,0x00]
-0x05,0x00,0x04,0xd7,0xff,0xff,0x03,0x00
-
-# GFX11: v_sub_nc_u16 v5, s1, s2 ; encoding: [0x05,0x00,0x04,0xd7,0x01,0x04,0x00,0x00]
+# W32-REAL16: v_sub_nc_u16 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x04,0xd7,0x01,0x05,0x02,0x00]
+# W32-FAKE16: v_sub_nc_u16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x04,0xd7,0x01,0x05,0x02,0x00]
+# W64-REAL16: v_sub_nc_u16 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x04,0xd7,0x01,0x05,0x02,0x00]
+# W64-FAKE16: v_sub_nc_u16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x04,0xd7,0x01,0x05,0x02,0x00]
+0x05,0x08,0x04,0xd7,0x01,0x05,0x02,0x00
+
+# W32-REAL16: v_sub_nc_u16 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xff,0xff,0x03,0x00]
+# W32-FAKE16: v_sub_nc_u16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xff,0xff,0x03,0x00]
+# W64-REAL16: v_sub_nc_u16 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xff,0xff,0x03,0x00]
+# W64-FAKE16: v_sub_nc_u16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xff,0xff,0x03,0x00]
+0x05,0x10,0x04,0xd7,0xff,0xff,0x03,0x00
+
+# W32-REAL16: v_sub_nc_u16 v5.l, s1, s2 ; encoding: [0x05,0x00,0x04,0xd7,0x01,0x04,0x00,0x00]
+# W32-FAKE16: v_sub_nc_u16 v5, s1, s2 ; encoding: [0x05,0x00,0x04,0xd7,0x01,0x04,0x00,0x00]
+# W64-REAL16: v_sub_nc_u16 v5.l, s1, s2 ; encoding: [0x05,0x00,0x04,0xd7,0x01,0x04,0x00,0x00]
+# W64-FAKE16: v_sub_nc_u16 v5, s1, s2 ; encoding: [0x05,0x00,0x04,0xd7,0x01,0x04,0x00,0x00]
0x05,0x00,0x04,0xd7,0x01,0x04,0x00,0x00
-# GFX11: v_sub_nc_u16 v5, s105, s105 ; encoding: [0x05,0x00,0x04,0xd7,0x69,0xd2,0x00,0x00]
+# W32-REAL16: v_sub_nc_u16 v5.l, s105, s105 ; encoding: [0x05,0x00,0x04,0xd7,0x69,0xd2,0x00,0x00]
+# W32-FAKE16: v_sub_nc_u16 v5, s105, s105 ; encoding: [0x05,0x00,0x04,0xd7,0x69,0xd2,0x00,0x00]
+# W64-REAL16: v_sub_nc_u16 v5.l, s105, s105 ; encoding: [0x05,0x00,0x04,0xd7,0x69,0xd2,0x00,0x00]
+# W64-FAKE16: v_sub_nc_u16 v5, s105, s105 ; encoding: [0x05,0x00,0x04,0xd7,0x69,0xd2,0x00,0x00]
0x05,0x00,0x04,0xd7,0x69,0xd2,0x00,0x00
-# GFX11: v_sub_nc_u16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x04,0xd7,0x6a,0xf6,0x00,0x00]
+# W32-REAL16: v_sub_nc_u16 v5.l, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x04,0xd7,0x6a,0xf6,0x00,0x00]
+# W32-FAKE16: v_sub_nc_u16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x04,0xd7,0x6a,0xf6,0x00,0x00]
+# W64-REAL16: v_sub_nc_u16 v5.l, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x04,0xd7,0x6a,0xf6,0x00,0x00]
+# W64-FAKE16: v_sub_nc_u16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x04,0xd7,0x6a,0xf6,0x00,0x00]
0x05,0x00,0x04,0xd7,0x6a,0xf6,0x00,0x00
-# GFX11: v_sub_nc_u16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x04,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+# W32-REAL16: v_sub_nc_u16 v5.l, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x04,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_sub_nc_u16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x04,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_sub_nc_u16 v5.l, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x04,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_sub_nc_u16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x04,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
0x05,0x00,0x04,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00
-# GFX11: v_sub_nc_u16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x04,0xd7,0x7b,0xfa,0x01,0x00]
+# W32-REAL16: v_sub_nc_u16 v5.l, ttmp15, src_scc ; encoding: [0x05,0x00,0x04,0xd7,0x7b,0xfa,0x01,0x00]
+# W32-FAKE16: v_sub_nc_u16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x04,0xd7,0x7b,0xfa,0x01,0x00]
+# W64-REAL16: v_sub_nc_u16 v5.l, ttmp15, src_scc ; encoding: [0x05,0x00,0x04,0xd7,0x7b,0xfa,0x01,0x00]
+# W64-FAKE16: v_sub_nc_u16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x04,0xd7,0x7b,0xfa,0x01,0x00]
0x05,0x00,0x04,0xd7,0x7b,0xfa,0x01,0x00
-# GFX11: v_sub_nc_u16 v5, m0, 0x3800
+# W32-REAL16: v_sub_nc_u16 v5.l, m0, 0x3800 ; encoding: [0x05,0x00,0x04,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00]
+# W32-FAKE16: v_sub_nc_u16 v5, m0, 0x3800 ; encoding: [0x05,0x00,0x04,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00]
+# W64-REAL16: v_sub_nc_u16 v5.l, m0, 0x3800 ; encoding: [0x05,0x00,0x04,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00]
+# W64-FAKE16: v_sub_nc_u16 v5, m0, 0x3800 ; encoding: [0x05,0x00,0x04,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00]
0x05,0x00,0x04,0xd7,0x7d,0xe0,0x01,0x00
-# GFX11: v_sub_nc_u16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x04,0xd7,0x7e,0x82,0x01,0x00]
+# W32-REAL16: v_sub_nc_u16 v5.l, exec_lo, -1 ; encoding: [0x05,0x00,0x04,0xd7,0x7e,0x82,0x01,0x00]
+# W32-FAKE16: v_sub_nc_u16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x04,0xd7,0x7e,0x82,0x01,0x00]
+# W64-REAL16: v_sub_nc_u16 v5.l, exec_lo, -1 ; encoding: [0x05,0x00,0x04,0xd7,0x7e,0x82,0x01,0x00]
+# W64-FAKE16: v_sub_nc_u16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x04,0xd7,0x7e,0x82,0x01,0x00]
0x05,0x00,0x04,0xd7,0x7e,0x82,0x01,0x00
-# GFX11: v_sub_nc_u16 v5, exec_hi, null ; encoding: [0x05,0x00,0x04,0xd7,0x7f,0xf8,0x00,0x00]
+# W32-REAL16: v_sub_nc_u16 v5.l, exec_hi, null ; encoding: [0x05,0x00,0x04,0xd7,0x7f,0xf8,0x00,0x00]
+# W32-FAKE16: v_sub_nc_u16 v5, exec_hi, null ; encoding: [0x05,0x00,0x04,0xd7,0x7f,0xf8,0x00,0x00]
+# W64-REAL16: v_sub_nc_u16 v5.l, exec_hi, null ; encoding: [0x05,0x00,0x04,0xd7,0x7f,0xf8,0x00,0x00]
+# W64-FAKE16: v_sub_nc_u16 v5, exec_hi, null ; encoding: [0x05,0x00,0x04,0xd7,0x7f,0xf8,0x00,0x00]
0x05,0x00,0x04,0xd7,0x7f,0xf8,0x00,0x00
-# GFX11: v_sub_nc_u16 v5, null, exec_lo ; encoding: [0x05,0x00,0x04,0xd7,0x7c,0xfc,0x00,0x00]
+# W32-REAL16: v_sub_nc_u16 v5.l, null, exec_lo ; encoding: [0x05,0x00,0x04,0xd7,0x7c,0xfc,0x00,0x00]
+# W32-FAKE16: v_sub_nc_u16 v5, null, exec_lo ; encoding: [0x05,0x00,0x04,0xd7,0x7c,0xfc,0x00,0x00]
+# W64-REAL16: v_sub_nc_u16 v5.l, null, exec_lo ; encoding: [0x05,0x00,0x04,0xd7,0x7c,0xfc,0x00,0x00]
+# W64-FAKE16: v_sub_nc_u16 v5, null, exec_lo ; encoding: [0x05,0x00,0x04,0xd7,0x7c,0xfc,0x00,0x00]
0x05,0x00,0x04,0xd7,0x7c,0xfc,0x00,0x00
-# GFX11: v_sub_nc_u16 v5, -1, exec_hi op_sel:[1,1,1] ; encoding: [0x05,0x58,0x04,0xd7,0xc1,0xfe,0x00,0x00]
-0x05,0x58,0x04,0xd7,0xc1,0xfe,0x00,0x00
-
-# GFX11: v_sub_nc_u16 v5, 0x3800, m0 op_sel:[1,0,0]
-0x05,0x08,0x04,0xd7,0xf0,0xfa,0x00,0x00
-
-# GFX11: v_sub_nc_u16 v5, src_scc, vcc_lo op_sel:[0,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xfd,0xd4,0x00,0x00]
-0x05,0x10,0x04,0xd7,0xfd,0xd4,0x00,0x00
+# W32-REAL16: v_sub_nc_u16 v5.l, -1, exec_hi ; encoding: [0x05,0x00,0x04,0xd7,0xc1,0xfe,0x00,0x00]
+# W32-FAKE16: v_sub_nc_u16 v5, -1, exec_hi ; encoding: [0x05,0x00,0x04,0xd7,0xc1,0xfe,0x00,0x00]
+# W64-REAL16: v_sub_nc_u16 v5.l, -1, exec_hi ; encoding: [0x05,0x00,0x04,0xd7,0xc1,0xfe,0x00,0x00]
+# W64-FAKE16: v_sub_nc_u16 v5, -1, exec_hi ; encoding: [0x05,0x00,0x04,0xd7,0xc1,0xfe,0x00,0x00]
+0x05,0x00,0x04,0xd7,0xc1,0xfe,0x00,0x00
+
+# W32-REAL16: v_sub_nc_u16 v5.l, 0x3800, m0 ; encoding: [0x05,0x00,0x04,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00]
+# W32-FAKE16: v_sub_nc_u16 v5, 0x3800, m0 ; encoding: [0x05,0x00,0x04,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00]
+# W64-REAL16: v_sub_nc_u16 v5.l, 0x3800, m0 ; encoding: [0x05,0x00,0x04,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00]
+# W64-FAKE16: v_sub_nc_u16 v5, 0x3800, m0 ; encoding: [0x05,0x00,0x04,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00]
+0x05,0x00,0x04,0xd7,0xf0,0xfa,0x00,0x00
+
+# W32-REAL16: v_sub_nc_u16 v5.l, src_scc, vcc_lo ; encoding: [0x05,0x00,0x04,0xd7,0xfd,0xd4,0x00,0x00]
+# W32-FAKE16: v_sub_nc_u16 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x04,0xd7,0xfd,0xd4,0x00,0x00]
+# W64-REAL16: v_sub_nc_u16 v5.l, src_scc, vcc_lo ; encoding: [0x05,0x00,0x04,0xd7,0xfd,0xd4,0x00,0x00]
+# W64-FAKE16: v_sub_nc_u16 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x04,0xd7,0xfd,0xd4,0x00,0x00]
+0x05,0x00,0x04,0xd7,0xfd,0xd4,0x00,0x00
+
+# W32-REAL16: v_sub_nc_u16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x04,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_sub_nc_u16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x04,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_sub_nc_u16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x04,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_sub_nc_u16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x04,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+0xff,0xc0,0x04,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00
-# GFX11: v_sub_nc_u16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x04,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W32-REAL16: v_sub_nc_u16 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x04,0xd7,0x01,0x05,0x02,0x00]
+# W32-FAKE16: v_sub_nc_u16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x04,0xd7,0x01,0x05,0x02,0x00]
+# W64-REAL16: v_sub_nc_u16 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x04,0xd7,0x01,0x05,0x02,0x00]
+# W64-FAKE16: v_sub_nc_u16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x04,0xd7,0x01,0x05,0x02,0x00]
+0x05,0x08,0x04,0xd7,0x01,0x05,0x02,0x00
+
+# W32-REAL16: v_sub_nc_u16 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xff,0xff,0x03,0x00]
+# W32-FAKE16: v_sub_nc_u16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xff,0xff,0x03,0x00]
+# W64-REAL16: v_sub_nc_u16 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xff,0xff,0x03,0x00]
+# W64-FAKE16: v_sub_nc_u16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xff,0xff,0x03,0x00]
+0x05,0x10,0x04,0xd7,0xff,0xff,0x03,0x00
+
+# W32-REAL16: v_sub_nc_u16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x04,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_sub_nc_u16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x04,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_sub_nc_u16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x04,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_sub_nc_u16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x04,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
0xff,0xc0,0x04,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00
# W32: v_subrev_co_u32 v5, s12, v1, v2 ; encoding: [0x05,0x0c,0x02,0xd7,0x01,0x05,0x02,0x00]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16.txt
index 4ae8b053f0e0f3..d0bd6398ad10a0 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16.txt
@@ -3824,88 +3824,220 @@
# W64-FAKE16: v_xor_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x64,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
0xff,0x00,0x64,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30
-# GFX11: v_add_nc_i16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff
-# GFX11: v_add_nc_i16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff
-# GFX11: v_add_nc_i16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff
-# GFX11: v_add_nc_i16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff
-# GFX11: v_add_nc_i16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff
-# GFX11: v_add_nc_i16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff
-# GFX11: v_add_nc_i16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff
-# GFX11: v_add_nc_i16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff
-# GFX11: v_add_nc_i16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff
-# GFX11: v_add_nc_i16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff
-# GFX11: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-REAL16: v_add_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-REAL16: v_add_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff
-# GFX11: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01
-# GFX11: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13
-# GFX11: v_add_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W32-REAL16: v_add_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_add_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30
-# GFX11: v_add_nc_u16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W32-REAL16: v_add_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-REAL16: v_add_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff
+
+# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01
+
+# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13
+
+# W32-REAL16: v_add_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_add_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30
+
+# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff
-# GFX11: v_add_nc_u16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff
-# GFX11: v_add_nc_u16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff
-# GFX11: v_add_nc_u16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff
-# GFX11: v_add_nc_u16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff
-# GFX11: v_add_nc_u16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff
-# GFX11: v_add_nc_u16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff
-# GFX11: v_add_nc_u16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff
-# GFX11: v_add_nc_u16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff
-# GFX11: v_add_nc_u16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff
-# GFX11: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-REAL16: v_add_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-REAL16: v_add_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff
+
+# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01
+
+# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13
+
+# W32-REAL16: v_add_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_add_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30
+
+# W32-REAL16: v_add_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-REAL16: v_add_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff
-# GFX11: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01
-# GFX11: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13
-# GFX11: v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W32-REAL16: v_add_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_add_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30
# GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
@@ -4664,88 +4796,220 @@
# GFX11: v_pack_b32_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x11,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
0xff,0x13,0x11,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30
-# GFX11: v_sub_nc_i16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff
-# GFX11: v_sub_nc_i16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff
-# GFX11: v_sub_nc_i16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff
-# GFX11: v_sub_nc_i16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff
-# GFX11: v_sub_nc_i16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff
-# GFX11: v_sub_nc_i16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff
-# GFX11: v_sub_nc_i16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff
-# GFX11: v_sub_nc_i16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff
-# GFX11: v_sub_nc_i16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff
-# GFX11: v_sub_nc_i16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff
-# GFX11: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-REAL16: v_sub_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff
-# GFX11: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01
-# GFX11: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13
-# GFX11: v_sub_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W32-REAL16: v_sub_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30
-# GFX11: v_sub_nc_u16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W32-REAL16: v_sub_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff
+
+# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01
+
+# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13
+
+# W32-REAL16: v_sub_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30
+
+# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff
-# GFX11: v_sub_nc_u16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff
-# GFX11: v_sub_nc_u16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff
-# GFX11: v_sub_nc_u16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff
-# GFX11: v_sub_nc_u16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff
-# GFX11: v_sub_nc_u16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff
-# GFX11: v_sub_nc_u16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff
-# GFX11: v_sub_nc_u16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff
-# GFX11: v_sub_nc_u16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff
-# GFX11: v_sub_nc_u16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff
-# GFX11: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-REAL16: v_sub_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff
+
+# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01
+
+# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13
+
+# W32-REAL16: v_sub_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30
+
+# W32-REAL16: v_sub_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff
-# GFX11: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01
-# GFX11: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13
-# GFX11: v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W32-REAL16: v_sub_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30
# GFX11: v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x00,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8.txt
index b44dba748666c7..cbf5a3d11e50b8 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8.txt
@@ -2168,34 +2168,112 @@
# W64-FAKE16: v_xor_b16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x64,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
0xff,0x00,0x64,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00
-# GFX11: v_add_nc_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
0x05,0x00,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# GFX11: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_add_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_add_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# GFX11: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
0x05,0x08,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# GFX11: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
0x05,0x10,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# GFX11: v_add_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W32-REAL16: v_add_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_add_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
0xff,0xc0,0x0d,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00
-# GFX11: v_add_nc_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_add_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_add_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
+
+# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+0x05,0x08,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
+
+# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+0x05,0x10,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
+
+# W32-REAL16: v_add_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_add_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+0xff,0xc0,0x0d,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00
+
+# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
0x05,0x00,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# GFX11: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_add_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_add_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+0x05,0x58,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
+
+# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+0x05,0x08,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
+
+# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+0x05,0x10,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
+
+# W32-REAL16: v_add_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_add_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+0xff,0xc0,0x03,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00
+
+# W32-REAL16: v_add_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_add_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
0x05,0x58,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# GFX11: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
0x05,0x08,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# GFX11: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
0x05,0x10,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# GFX11: v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W32-REAL16: v_add_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_add_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
0xff,0xc0,0x03,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00
# GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x12,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
@@ -2840,34 +2918,112 @@
# GFX11: v_pack_b32_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x11,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
0xff,0x13,0x11,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00
-# GFX11: v_sub_nc_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
0x05,0x00,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# GFX11: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_sub_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
0x05,0x58,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# GFX11: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
0x05,0x08,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# GFX11: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
0x05,0x10,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# GFX11: v_sub_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W32-REAL16: v_sub_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
0xff,0xc0,0x0e,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00
-# GFX11: v_sub_nc_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_sub_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+0x05,0x58,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
+
+# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+0x05,0x08,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
+
+# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+0x05,0x10,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
+
+# W32-REAL16: v_sub_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+0xff,0xc0,0x0e,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00
+
+# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
0x05,0x00,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# GFX11: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_sub_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+0x05,0x58,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
+
+# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+0x05,0x08,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
+
+# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+0x05,0x10,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
+
+# W32-REAL16: v_sub_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+0xff,0xc0,0x04,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00
+
+# W32-REAL16: v_sub_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
0x05,0x58,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# GFX11: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
0x05,0x08,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# GFX11: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
0x05,0x10,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# GFX11: v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W32-REAL16: v_sub_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
0xff,0xc0,0x04,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00
# GFX11: v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x00,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3.txt
index af04a31423b6fe..d9add27704d9d2 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3.txt
@@ -153,49 +153,115 @@
# GFX12: v_add_lshl_u32 v255, 0xaf123456, vcc_hi, null ; encoding: [0xff,0x00,0x47,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf]
0xff,0x00,0x47,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf
-# GFX12: v_add_nc_i16 v5, v1, v2 ; encoding: [0x05,0x00,0x0d,0xd7,0x01,0x05,0x02,0x00]
+# W32-REAL16: v_add_nc_i16 v5.l, v1.l, v2.l ; encoding: [0x05,0x00,0x0d,0xd7,0x01,0x05,0x02,0x00]
+# W32-FAKE16: v_add_nc_i16 v5, v1, v2 ; encoding: [0x05,0x00,0x0d,0xd7,0x01,0x05,0x02,0x00]
+# W64-REAL16: v_add_nc_i16 v5.l, v1.l, v2.l ; encoding: [0x05,0x00,0x0d,0xd7,0x01,0x05,0x02,0x00]
+# W64-FAKE16: v_add_nc_i16 v5, v1, v2 ; encoding: [0x05,0x00,0x0d,0xd7,0x01,0x05,0x02,0x00]
0x05,0x00,0x0d,0xd7,0x01,0x05,0x02,0x00
-# GFX12: v_add_nc_i16 v5, v255, v255 ; encoding: [0x05,0x00,0x0d,0xd7,0xff,0xff,0x03,0x00]
+# W32-REAL16: v_add_nc_i16 v5.l, v255.l, v255.l ; encoding: [0x05,0x00,0x0d,0xd7,0xff,0xff,0x03,0x00]
+# W32-FAKE16: v_add_nc_i16 v5, v255, v255 ; encoding: [0x05,0x00,0x0d,0xd7,0xff,0xff,0x03,0x00]
+# W64-REAL16: v_add_nc_i16 v5.l, v255.l, v255.l ; encoding: [0x05,0x00,0x0d,0xd7,0xff,0xff,0x03,0x00]
+# W64-FAKE16: v_add_nc_i16 v5, v255, v255 ; encoding: [0x05,0x00,0x0d,0xd7,0xff,0xff,0x03,0x00]
0x05,0x00,0x0d,0xd7,0xff,0xff,0x03,0x00
-# GFX12: v_add_nc_i16 v5, s1, s2 ; encoding: [0x05,0x00,0x0d,0xd7,0x01,0x04,0x00,0x00]
+# W32-REAL16: v_add_nc_i16 v5.l, s1, s2 ; encoding: [0x05,0x00,0x0d,0xd7,0x01,0x04,0x00,0x00]
+# W32-FAKE16: v_add_nc_i16 v5, s1, s2 ; encoding: [0x05,0x00,0x0d,0xd7,0x01,0x04,0x00,0x00]
+# W64-REAL16: v_add_nc_i16 v5.l, s1, s2 ; encoding: [0x05,0x00,0x0d,0xd7,0x01,0x04,0x00,0x00]
+# W64-FAKE16: v_add_nc_i16 v5, s1, s2 ; encoding: [0x05,0x00,0x0d,0xd7,0x01,0x04,0x00,0x00]
0x05,0x00,0x0d,0xd7,0x01,0x04,0x00,0x00
-# GFX12: v_add_nc_i16 v5, s105, s105 ; encoding: [0x05,0x00,0x0d,0xd7,0x69,0xd2,0x00,0x00]
+# W32-REAL16: v_add_nc_i16 v5.l, s105, s105 ; encoding: [0x05,0x00,0x0d,0xd7,0x69,0xd2,0x00,0x00]
+# W32-FAKE16: v_add_nc_i16 v5, s105, s105 ; encoding: [0x05,0x00,0x0d,0xd7,0x69,0xd2,0x00,0x00]
+# W64-REAL16: v_add_nc_i16 v5.l, s105, s105 ; encoding: [0x05,0x00,0x0d,0xd7,0x69,0xd2,0x00,0x00]
+# W64-FAKE16: v_add_nc_i16 v5, s105, s105 ; encoding: [0x05,0x00,0x0d,0xd7,0x69,0xd2,0x00,0x00]
0x05,0x00,0x0d,0xd7,0x69,0xd2,0x00,0x00
-# GFX12: v_add_nc_i16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x0d,0xd7,0x6a,0xf6,0x00,0x00]
+# W32-REAL16: v_add_nc_i16 v5.l, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x0d,0xd7,0x6a,0xf6,0x00,0x00]
+# W32-FAKE16: v_add_nc_i16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x0d,0xd7,0x6a,0xf6,0x00,0x00]
+# W64-REAL16: v_add_nc_i16 v5.l, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x0d,0xd7,0x6a,0xf6,0x00,0x00]
+# W64-FAKE16: v_add_nc_i16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x0d,0xd7,0x6a,0xf6,0x00,0x00]
0x05,0x00,0x0d,0xd7,0x6a,0xf6,0x00,0x00
-# GFX12: v_add_nc_i16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x0d,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+# W32-REAL16: v_add_nc_i16 v5.l, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x0d,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_add_nc_i16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x0d,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_add_nc_i16 v5.l, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x0d,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_add_nc_i16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x0d,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
0x05,0x00,0x0d,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00
-# GFX12: v_add_nc_i16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x0d,0xd7,0x7b,0xfa,0x01,0x00]
+# W32-REAL16: v_add_nc_i16 v5.l, ttmp15, src_scc ; encoding: [0x05,0x00,0x0d,0xd7,0x7b,0xfa,0x01,0x00]
+# W32-FAKE16: v_add_nc_i16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x0d,0xd7,0x7b,0xfa,0x01,0x00]
+# W64-REAL16: v_add_nc_i16 v5.l, ttmp15, src_scc ; encoding: [0x05,0x00,0x0d,0xd7,0x7b,0xfa,0x01,0x00]
+# W64-FAKE16: v_add_nc_i16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x0d,0xd7,0x7b,0xfa,0x01,0x00]
0x05,0x00,0x0d,0xd7,0x7b,0xfa,0x01,0x00
-# GFX12: v_add_nc_i16 v5, m0, 0x3800
+# W32-REAL16: v_add_nc_i16 v5.l, m0, 0x3800 ; encoding: [0x05,0x00,0x0d,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00]
+# W32-FAKE16: v_add_nc_i16 v5, m0, 0x3800 ; encoding: [0x05,0x00,0x0d,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00]
+# W64-REAL16: v_add_nc_i16 v5.l, m0, 0x3800 ; encoding: [0x05,0x00,0x0d,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00]
+# W64-FAKE16: v_add_nc_i16 v5, m0, 0x3800 ; encoding: [0x05,0x00,0x0d,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00]
0x05,0x00,0x0d,0xd7,0x7d,0xe0,0x01,0x00
-# GFX12: v_add_nc_i16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x0d,0xd7,0x7e,0x82,0x01,0x00]
+# W32-REAL16: v_add_nc_i16 v5.l, exec_lo, -1 ; encoding: [0x05,0x00,0x0d,0xd7,0x7e,0x82,0x01,0x00]
+# W32-FAKE16: v_add_nc_i16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x0d,0xd7,0x7e,0x82,0x01,0x00]
+# W64-REAL16: v_add_nc_i16 v5.l, exec_lo, -1 ; encoding: [0x05,0x00,0x0d,0xd7,0x7e,0x82,0x01,0x00]
+# W64-FAKE16: v_add_nc_i16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x0d,0xd7,0x7e,0x82,0x01,0x00]
0x05,0x00,0x0d,0xd7,0x7e,0x82,0x01,0x00
-# GFX12: v_add_nc_i16 v5, exec_hi, null ; encoding: [0x05,0x00,0x0d,0xd7,0x7f,0xf8,0x00,0x00]
+# W32-REAL16: v_add_nc_i16 v5.l, exec_hi, null ; encoding: [0x05,0x00,0x0d,0xd7,0x7f,0xf8,0x00,0x00]
+# W32-FAKE16: v_add_nc_i16 v5, exec_hi, null ; encoding: [0x05,0x00,0x0d,0xd7,0x7f,0xf8,0x00,0x00]
+# W64-REAL16: v_add_nc_i16 v5.l, exec_hi, null ; encoding: [0x05,0x00,0x0d,0xd7,0x7f,0xf8,0x00,0x00]
+# W64-FAKE16: v_add_nc_i16 v5, exec_hi, null ; encoding: [0x05,0x00,0x0d,0xd7,0x7f,0xf8,0x00,0x00]
0x05,0x00,0x0d,0xd7,0x7f,0xf8,0x00,0x00
-# GFX12: v_add_nc_i16 v5, null, exec_lo ; encoding: [0x05,0x00,0x0d,0xd7,0x7c,0xfc,0x00,0x00]
+# W32-REAL16: v_add_nc_i16 v5.l, null, exec_lo ; encoding: [0x05,0x00,0x0d,0xd7,0x7c,0xfc,0x00,0x00]
+# W32-FAKE16: v_add_nc_i16 v5, null, exec_lo ; encoding: [0x05,0x00,0x0d,0xd7,0x7c,0xfc,0x00,0x00]
+# W64-REAL16: v_add_nc_i16 v5.l, null, exec_lo ; encoding: [0x05,0x00,0x0d,0xd7,0x7c,0xfc,0x00,0x00]
+# W64-FAKE16: v_add_nc_i16 v5, null, exec_lo ; encoding: [0x05,0x00,0x0d,0xd7,0x7c,0xfc,0x00,0x00]
0x05,0x00,0x0d,0xd7,0x7c,0xfc,0x00,0x00
-# GFX12: v_add_nc_i16 v5, -1, exec_hi op_sel:[1,1,1] ; encoding: [0x05,0x58,0x0d,0xd7,0xc1,0xfe,0x00,0x00]
+# W32-REAL16: v_add_nc_i16 v5.h, -1, exec_hi op_sel:[1,1,1] ; encoding: [0x05,0x58,0x0d,0xd7,0xc1,0xfe,0x00,0x00]
+# W32-FAKE16: v_add_nc_i16 v5, -1, exec_hi op_sel:[1,1,1] ; encoding: [0x05,0x58,0x0d,0xd7,0xc1,0xfe,0x00,0x00]
+# W64-REAL16: v_add_nc_i16 v5.h, -1, exec_hi op_sel:[1,1,1] ; encoding: [0x05,0x58,0x0d,0xd7,0xc1,0xfe,0x00,0x00]
+# W64-FAKE16: v_add_nc_i16 v5, -1, exec_hi op_sel:[1,1,1] ; encoding: [0x05,0x58,0x0d,0xd7,0xc1,0xfe,0x00,0x00]
0x05,0x58,0x0d,0xd7,0xc1,0xfe,0x00,0x00
-# GFX12: v_add_nc_i16 v5, 0x3800, m0 op_sel:[1,0,0]
+# W32-REAL16: v_add_nc_i16 v5.l, 0x3800, m0 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0d,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00]
+# W32-FAKE16: v_add_nc_i16 v5, 0x3800, m0 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0d,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00]
+# W64-REAL16: v_add_nc_i16 v5.l, 0x3800, m0 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0d,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00]
+# W64-FAKE16: v_add_nc_i16 v5, 0x3800, m0 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0d,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00]
0x05,0x08,0x0d,0xd7,0xf0,0xfa,0x00,0x00
-# GFX12: v_add_nc_i16 v5, src_scc, vcc_lo op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xfd,0xd4,0x00,0x00]
+# W32-REAL16: v_add_nc_i16 v5.l, src_scc, vcc_lo op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xfd,0xd4,0x00,0x00]
+# W32-FAKE16: v_add_nc_i16 v5, src_scc, vcc_lo op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xfd,0xd4,0x00,0x00]
+# W64-REAL16: v_add_nc_i16 v5.l, src_scc, vcc_lo op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xfd,0xd4,0x00,0x00]
+# W64-FAKE16: v_add_nc_i16 v5, src_scc, vcc_lo op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xfd,0xd4,0x00,0x00]
0x05,0x10,0x0d,0xd7,0xfd,0xd4,0x00,0x00
-# GFX12: v_add_nc_i16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0d,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W32-REAL16: v_add_nc_i16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0d,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_add_nc_i16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0d,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_add_nc_i16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0d,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_add_nc_i16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0d,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+0xff,0xc0,0x0d,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00
+
+# GFX11: v_add_nc_i16 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0d,0xd7,0x01,0x05,0x02,0x00]
+# W32-REAL16: v_add_nc_i16 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0d,0xd7,0x01,0x05,0x02,0x00]
+# W32-FAKE16: v_add_nc_i16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0d,0xd7,0x01,0x05,0x02,0x00]
+# W64-REAL16: v_add_nc_i16 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0d,0xd7,0x01,0x05,0x02,0x00]
+# W64-FAKE16: v_add_nc_i16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0d,0xd7,0x01,0x05,0x02,0x00]
+0x05,0x08,0x0d,0xd7,0x01,0x05,0x02,0x00
+
+# GFX11: v_add_nc_i16 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xff,0xff,0x03,0x00]
+# W32-REAL16: v_add_nc_i16 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xff,0xff,0x03,0x00]
+# W32-FAKE16: v_add_nc_i16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xff,0xff,0x03,0x00]
+# W64-REAL16: v_add_nc_i16 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xff,0xff,0x03,0x00]
+# W64-FAKE16: v_add_nc_i16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xff,0xff,0x03,0x00]
+0x05,0x10,0x0d,0xd7,0xff,0xff,0x03,0x00
+
+# GFX11: v_add_nc_i16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0d,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W32-REAL16: v_add_nc_i16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0d,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_add_nc_i16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0d,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_add_nc_i16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0d,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_add_nc_i16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0d,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
0xff,0xc0,0x0d,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00
# GFX12: v_add_nc_i32 v5, v1, v2 ; encoding: [0x05,0x00,0x26,0xd7,0x01,0x05,0x02,0x00]
@@ -243,49 +309,115 @@
# GFX12: v_add_nc_i32 v255, 0xaf123456, vcc_hi clamp ; encoding: [0xff,0x80,0x26,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf]
0xff,0x80,0x26,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf
-# GFX12: v_add_nc_u16 v5, v1, v2 ; encoding: [0x05,0x00,0x03,0xd7,0x01,0x05,0x02,0x00]
+# W32-REAL16: v_add_nc_u16 v5.l, v1.l, v2.l ; encoding: [0x05,0x00,0x03,0xd7,0x01,0x05,0x02,0x00]
+# W32-FAKE16: v_add_nc_u16 v5, v1, v2 ; encoding: [0x05,0x00,0x03,0xd7,0x01,0x05,0x02,0x00]
+# W64-REAL16: v_add_nc_u16 v5.l, v1.l, v2.l ; encoding: [0x05,0x00,0x03,0xd7,0x01,0x05,0x02,0x00]
+# W64-FAKE16: v_add_nc_u16 v5, v1, v2 ; encoding: [0x05,0x00,0x03,0xd7,0x01,0x05,0x02,0x00]
0x05,0x00,0x03,0xd7,0x01,0x05,0x02,0x00
-# GFX12: v_add_nc_u16 v5, v255, v255 ; encoding: [0x05,0x00,0x03,0xd7,0xff,0xff,0x03,0x00]
+# W32-REAL16: v_add_nc_u16 v5.l, v255.l, v255.l ; encoding: [0x05,0x00,0x03,0xd7,0xff,0xff,0x03,0x00]
+# W32-FAKE16: v_add_nc_u16 v5, v255, v255 ; encoding: [0x05,0x00,0x03,0xd7,0xff,0xff,0x03,0x00]
+# W64-REAL16: v_add_nc_u16 v5.l, v255.l, v255.l ; encoding: [0x05,0x00,0x03,0xd7,0xff,0xff,0x03,0x00]
+# W64-FAKE16: v_add_nc_u16 v5, v255, v255 ; encoding: [0x05,0x00,0x03,0xd7,0xff,0xff,0x03,0x00]
0x05,0x00,0x03,0xd7,0xff,0xff,0x03,0x00
-# GFX12: v_add_nc_u16 v5, s1, s2 ; encoding: [0x05,0x00,0x03,0xd7,0x01,0x04,0x00,0x00]
+# W32-REAL16: v_add_nc_u16 v5.l, s1, s2 ; encoding: [0x05,0x00,0x03,0xd7,0x01,0x04,0x00,0x00]
+# W32-FAKE16: v_add_nc_u16 v5, s1, s2 ; encoding: [0x05,0x00,0x03,0xd7,0x01,0x04,0x00,0x00]
+# W64-REAL16: v_add_nc_u16 v5.l, s1, s2 ; encoding: [0x05,0x00,0x03,0xd7,0x01,0x04,0x00,0x00]
+# W64-FAKE16: v_add_nc_u16 v5, s1, s2 ; encoding: [0x05,0x00,0x03,0xd7,0x01,0x04,0x00,0x00]
0x05,0x00,0x03,0xd7,0x01,0x04,0x00,0x00
-# GFX12: v_add_nc_u16 v5, s105, s105 ; encoding: [0x05,0x00,0x03,0xd7,0x69,0xd2,0x00,0x00]
+# W32-REAL16: v_add_nc_u16 v5.l, s105, s105 ; encoding: [0x05,0x00,0x03,0xd7,0x69,0xd2,0x00,0x00]
+# W32-FAKE16: v_add_nc_u16 v5, s105, s105 ; encoding: [0x05,0x00,0x03,0xd7,0x69,0xd2,0x00,0x00]
+# W64-REAL16: v_add_nc_u16 v5.l, s105, s105 ; encoding: [0x05,0x00,0x03,0xd7,0x69,0xd2,0x00,0x00]
+# W64-FAKE16: v_add_nc_u16 v5, s105, s105 ; encoding: [0x05,0x00,0x03,0xd7,0x69,0xd2,0x00,0x00]
0x05,0x00,0x03,0xd7,0x69,0xd2,0x00,0x00
-# GFX12: v_add_nc_u16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x03,0xd7,0x6a,0xf6,0x00,0x00]
+# W32-REAL16: v_add_nc_u16 v5.l, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x03,0xd7,0x6a,0xf6,0x00,0x00]
+# W32-FAKE16: v_add_nc_u16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x03,0xd7,0x6a,0xf6,0x00,0x00]
+# W64-REAL16: v_add_nc_u16 v5.l, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x03,0xd7,0x6a,0xf6,0x00,0x00]
+# W64-FAKE16: v_add_nc_u16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x03,0xd7,0x6a,0xf6,0x00,0x00]
0x05,0x00,0x03,0xd7,0x6a,0xf6,0x00,0x00
-# GFX12: v_add_nc_u16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x03,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+# W32-REAL16: v_add_nc_u16 v5.l, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x03,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_add_nc_u16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x03,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_add_nc_u16 v5.l, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x03,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_add_nc_u16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x03,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
0x05,0x00,0x03,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00
-# GFX12: v_add_nc_u16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x03,0xd7,0x7b,0xfa,0x01,0x00]
+# W32-REAL16: v_add_nc_u16 v5.l, ttmp15, src_scc ; encoding: [0x05,0x00,0x03,0xd7,0x7b,0xfa,0x01,0x00]
+# W32-FAKE16: v_add_nc_u16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x03,0xd7,0x7b,0xfa,0x01,0x00]
+# W64-REAL16: v_add_nc_u16 v5.l, ttmp15, src_scc ; encoding: [0x05,0x00,0x03,0xd7,0x7b,0xfa,0x01,0x00]
+# W64-FAKE16: v_add_nc_u16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x03,0xd7,0x7b,0xfa,0x01,0x00]
0x05,0x00,0x03,0xd7,0x7b,0xfa,0x01,0x00
-# GFX12: v_add_nc_u16 v5, m0, 0x3800
+# W32-REAL16: v_add_nc_u16 v5.l, m0, 0x3800 ; encoding: [0x05,0x00,0x03,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00]
+# W32-FAKE16: v_add_nc_u16 v5, m0, 0x3800 ; encoding: [0x05,0x00,0x03,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00]
+# W64-REAL16: v_add_nc_u16 v5.l, m0, 0x3800 ; encoding: [0x05,0x00,0x03,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00]
+# W64-FAKE16: v_add_nc_u16 v5, m0, 0x3800 ; encoding: [0x05,0x00,0x03,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00]
0x05,0x00,0x03,0xd7,0x7d,0xe0,0x01,0x00
-# GFX12: v_add_nc_u16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x03,0xd7,0x7e,0x82,0x01,0x00]
+# W32-REAL16: v_add_nc_u16 v5.l, exec_lo, -1 ; encoding: [0x05,0x00,0x03,0xd7,0x7e,0x82,0x01,0x00]
+# W32-FAKE16: v_add_nc_u16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x03,0xd7,0x7e,0x82,0x01,0x00]
+# W64-REAL16: v_add_nc_u16 v5.l, exec_lo, -1 ; encoding: [0x05,0x00,0x03,0xd7,0x7e,0x82,0x01,0x00]
+# W64-FAKE16: v_add_nc_u16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x03,0xd7,0x7e,0x82,0x01,0x00]
0x05,0x00,0x03,0xd7,0x7e,0x82,0x01,0x00
-# GFX12: v_add_nc_u16 v5, exec_hi, null ; encoding: [0x05,0x00,0x03,0xd7,0x7f,0xf8,0x00,0x00]
+# W32-REAL16: v_add_nc_u16 v5.l, exec_hi, null ; encoding: [0x05,0x00,0x03,0xd7,0x7f,0xf8,0x00,0x00]
+# W32-FAKE16: v_add_nc_u16 v5, exec_hi, null ; encoding: [0x05,0x00,0x03,0xd7,0x7f,0xf8,0x00,0x00]
+# W64-REAL16: v_add_nc_u16 v5.l, exec_hi, null ; encoding: [0x05,0x00,0x03,0xd7,0x7f,0xf8,0x00,0x00]
+# W64-FAKE16: v_add_nc_u16 v5, exec_hi, null ; encoding: [0x05,0x00,0x03,0xd7,0x7f,0xf8,0x00,0x00]
0x05,0x00,0x03,0xd7,0x7f,0xf8,0x00,0x00
-# GFX12: v_add_nc_u16 v5, null, exec_lo ; encoding: [0x05,0x00,0x03,0xd7,0x7c,0xfc,0x00,0x00]
+# W32-REAL16: v_add_nc_u16 v5.l, null, exec_lo ; encoding: [0x05,0x00,0x03,0xd7,0x7c,0xfc,0x00,0x00]
+# W32-FAKE16: v_add_nc_u16 v5, null, exec_lo ; encoding: [0x05,0x00,0x03,0xd7,0x7c,0xfc,0x00,0x00]
+# W64-REAL16: v_add_nc_u16 v5.l, null, exec_lo ; encoding: [0x05,0x00,0x03,0xd7,0x7c,0xfc,0x00,0x00]
+# W64-FAKE16: v_add_nc_u16 v5, null, exec_lo ; encoding: [0x05,0x00,0x03,0xd7,0x7c,0xfc,0x00,0x00]
0x05,0x00,0x03,0xd7,0x7c,0xfc,0x00,0x00
-# GFX12: v_add_nc_u16 v5, -1, exec_hi op_sel:[1,1,1] ; encoding: [0x05,0x58,0x03,0xd7,0xc1,0xfe,0x00,0x00]
+# W32-REAL16: v_add_nc_u16 v5.h, -1, exec_hi op_sel:[1,1,1] ; encoding: [0x05,0x58,0x03,0xd7,0xc1,0xfe,0x00,0x00]
+# W32-FAKE16: v_add_nc_u16 v5, -1, exec_hi op_sel:[1,1,1] ; encoding: [0x05,0x58,0x03,0xd7,0xc1,0xfe,0x00,0x00]
+# W64-REAL16: v_add_nc_u16 v5.h, -1, exec_hi op_sel:[1,1,1] ; encoding: [0x05,0x58,0x03,0xd7,0xc1,0xfe,0x00,0x00]
+# W64-FAKE16: v_add_nc_u16 v5, -1, exec_hi op_sel:[1,1,1] ; encoding: [0x05,0x58,0x03,0xd7,0xc1,0xfe,0x00,0x00]
0x05,0x58,0x03,0xd7,0xc1,0xfe,0x00,0x00
-# GFX12: v_add_nc_u16 v5, 0x3800, m0 op_sel:[1,0,0]
+# W32-REAL16: v_add_nc_u16 v5.l, 0x3800, m0 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x03,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00]
+# W32-FAKE16: v_add_nc_u16 v5, 0x3800, m0 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x03,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00]
+# W64-REAL16: v_add_nc_u16 v5.l, 0x3800, m0 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x03,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00]
+# W64-FAKE16: v_add_nc_u16 v5, 0x3800, m0 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x03,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00]
0x05,0x08,0x03,0xd7,0xf0,0xfa,0x00,0x00
-# GFX12: v_add_nc_u16 v5, src_scc, vcc_lo op_sel:[0,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xfd,0xd4,0x00,0x00]
+# W32-REAL16: v_add_nc_u16 v5.l, src_scc, vcc_lo op_sel:[0,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xfd,0xd4,0x00,0x00]
+# W32-FAKE16: v_add_nc_u16 v5, src_scc, vcc_lo op_sel:[0,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xfd,0xd4,0x00,0x00]
+# W64-REAL16: v_add_nc_u16 v5.l, src_scc, vcc_lo op_sel:[0,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xfd,0xd4,0x00,0x00]
+# W64-FAKE16: v_add_nc_u16 v5, src_scc, vcc_lo op_sel:[0,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xfd,0xd4,0x00,0x00]
0x05,0x10,0x03,0xd7,0xfd,0xd4,0x00,0x00
-# GFX12: v_add_nc_u16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x03,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W32-REAL16: v_add_nc_u16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x03,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_add_nc_u16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x03,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_add_nc_u16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x03,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_add_nc_u16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x03,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+0xff,0xc0,0x03,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00
+
+# GFX11: v_add_nc_u16 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x03,0xd7,0x01,0x05,0x02,0x00]
+# W32-REAL16: v_add_nc_u16 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x03,0xd7,0x01,0x05,0x02,0x00]
+# W32-FAKE16: v_add_nc_u16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x03,0xd7,0x01,0x05,0x02,0x00]
+# W64-REAL16: v_add_nc_u16 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x03,0xd7,0x01,0x05,0x02,0x00]
+# W64-FAKE16: v_add_nc_u16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x03,0xd7,0x01,0x05,0x02,0x00]
+0x05,0x08,0x03,0xd7,0x01,0x05,0x02,0x00
+
+# GFX11: v_add_nc_u16 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xff,0xff,0x03,0x00]
+# W32-REAL16: v_add_nc_u16 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xff,0xff,0x03,0x00]
+# W32-FAKE16: v_add_nc_u16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xff,0xff,0x03,0x00]
+# W64-REAL16: v_add_nc_u16 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xff,0xff,0x03,0x00]
+# W64-FAKE16: v_add_nc_u16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xff,0xff,0x03,0x00]
+0x05,0x10,0x03,0xd7,0xff,0xff,0x03,0x00
+
+# GFX11: v_add_nc_u16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x03,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W32-REAL16: v_add_nc_u16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x03,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_add_nc_u16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x03,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_add_nc_u16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x03,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_add_nc_u16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x03,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
0xff,0xc0,0x03,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00
# GFX12: v_alignbit_b32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x16,0xd6,0x01,0x05,0x0e,0x00]
@@ -5797,49 +5929,115 @@
# GFX12: v_sub_co_u32 v255, null, 0xaf123456, vcc_hi clamp ; encoding: [0xff,0xfc,0x01,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf]
0xff,0xfc,0x01,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf
-# GFX12: v_sub_nc_i16 v5, v1, v2 ; encoding: [0x05,0x00,0x0e,0xd7,0x01,0x05,0x02,0x00]
+# W32-REAL16: v_sub_nc_i16 v5.l, v1.l, v2.l ; encoding: [0x05,0x00,0x0e,0xd7,0x01,0x05,0x02,0x00]
+# W32-FAKE16: v_sub_nc_i16 v5, v1, v2 ; encoding: [0x05,0x00,0x0e,0xd7,0x01,0x05,0x02,0x00]
+# W64-REAL16: v_sub_nc_i16 v5.l, v1.l, v2.l ; encoding: [0x05,0x00,0x0e,0xd7,0x01,0x05,0x02,0x00]
+# W64-FAKE16: v_sub_nc_i16 v5, v1, v2 ; encoding: [0x05,0x00,0x0e,0xd7,0x01,0x05,0x02,0x00]
0x05,0x00,0x0e,0xd7,0x01,0x05,0x02,0x00
-# GFX12: v_sub_nc_i16 v5, v255, v255 ; encoding: [0x05,0x00,0x0e,0xd7,0xff,0xff,0x03,0x00]
+# W32-REAL16: v_sub_nc_i16 v5.l, v255.l, v255.l ; encoding: [0x05,0x00,0x0e,0xd7,0xff,0xff,0x03,0x00]
+# W32-FAKE16: v_sub_nc_i16 v5, v255, v255 ; encoding: [0x05,0x00,0x0e,0xd7,0xff,0xff,0x03,0x00]
+# W64-REAL16: v_sub_nc_i16 v5.l, v255.l, v255.l ; encoding: [0x05,0x00,0x0e,0xd7,0xff,0xff,0x03,0x00]
+# W64-FAKE16: v_sub_nc_i16 v5, v255, v255 ; encoding: [0x05,0x00,0x0e,0xd7,0xff,0xff,0x03,0x00]
0x05,0x00,0x0e,0xd7,0xff,0xff,0x03,0x00
-# GFX12: v_sub_nc_i16 v5, s1, s2 ; encoding: [0x05,0x00,0x0e,0xd7,0x01,0x04,0x00,0x00]
+# W32-REAL16: v_sub_nc_i16 v5.l, s1, s2 ; encoding: [0x05,0x00,0x0e,0xd7,0x01,0x04,0x00,0x00]
+# W32-FAKE16: v_sub_nc_i16 v5, s1, s2 ; encoding: [0x05,0x00,0x0e,0xd7,0x01,0x04,0x00,0x00]
+# W64-REAL16: v_sub_nc_i16 v5.l, s1, s2 ; encoding: [0x05,0x00,0x0e,0xd7,0x01,0x04,0x00,0x00]
+# W64-FAKE16: v_sub_nc_i16 v5, s1, s2 ; encoding: [0x05,0x00,0x0e,0xd7,0x01,0x04,0x00,0x00]
0x05,0x00,0x0e,0xd7,0x01,0x04,0x00,0x00
-# GFX12: v_sub_nc_i16 v5, s105, s105 ; encoding: [0x05,0x00,0x0e,0xd7,0x69,0xd2,0x00,0x00]
+# W32-REAL16: v_sub_nc_i16 v5.l, s105, s105 ; encoding: [0x05,0x00,0x0e,0xd7,0x69,0xd2,0x00,0x00]
+# W32-FAKE16: v_sub_nc_i16 v5, s105, s105 ; encoding: [0x05,0x00,0x0e,0xd7,0x69,0xd2,0x00,0x00]
+# W64-REAL16: v_sub_nc_i16 v5.l, s105, s105 ; encoding: [0x05,0x00,0x0e,0xd7,0x69,0xd2,0x00,0x00]
+# W64-FAKE16: v_sub_nc_i16 v5, s105, s105 ; encoding: [0x05,0x00,0x0e,0xd7,0x69,0xd2,0x00,0x00]
0x05,0x00,0x0e,0xd7,0x69,0xd2,0x00,0x00
-# GFX12: v_sub_nc_i16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x0e,0xd7,0x6a,0xf6,0x00,0x00]
+# W32-REAL16: v_sub_nc_i16 v5.l, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x0e,0xd7,0x6a,0xf6,0x00,0x00]
+# W32-FAKE16: v_sub_nc_i16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x0e,0xd7,0x6a,0xf6,0x00,0x00]
+# W64-REAL16: v_sub_nc_i16 v5.l, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x0e,0xd7,0x6a,0xf6,0x00,0x00]
+# W64-FAKE16: v_sub_nc_i16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x0e,0xd7,0x6a,0xf6,0x00,0x00]
0x05,0x00,0x0e,0xd7,0x6a,0xf6,0x00,0x00
-# GFX12: v_sub_nc_i16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x0e,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+# W32-REAL16: v_sub_nc_i16 v5.l, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x0e,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_sub_nc_i16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x0e,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_sub_nc_i16 v5.l, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x0e,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_sub_nc_i16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x0e,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
0x05,0x00,0x0e,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00
-# GFX12: v_sub_nc_i16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x0e,0xd7,0x7b,0xfa,0x01,0x00]
+# W32-REAL16: v_sub_nc_i16 v5.l, ttmp15, src_scc ; encoding: [0x05,0x00,0x0e,0xd7,0x7b,0xfa,0x01,0x00]
+# W32-FAKE16: v_sub_nc_i16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x0e,0xd7,0x7b,0xfa,0x01,0x00]
+# W64-REAL16: v_sub_nc_i16 v5.l, ttmp15, src_scc ; encoding: [0x05,0x00,0x0e,0xd7,0x7b,0xfa,0x01,0x00]
+# W64-FAKE16: v_sub_nc_i16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x0e,0xd7,0x7b,0xfa,0x01,0x00]
0x05,0x00,0x0e,0xd7,0x7b,0xfa,0x01,0x00
-# GFX12: v_sub_nc_i16 v5, m0, 0x3800
+# W32-REAL16: v_sub_nc_i16 v5.l, m0, 0x3800 ; encoding: [0x05,0x00,0x0e,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00]
+# W32-FAKE16: v_sub_nc_i16 v5, m0, 0x3800 ; encoding: [0x05,0x00,0x0e,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00]
+# W64-REAL16: v_sub_nc_i16 v5.l, m0, 0x3800 ; encoding: [0x05,0x00,0x0e,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00]
+# W64-FAKE16: v_sub_nc_i16 v5, m0, 0x3800 ; encoding: [0x05,0x00,0x0e,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00]
0x05,0x00,0x0e,0xd7,0x7d,0xe0,0x01,0x00
-# GFX12: v_sub_nc_i16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x0e,0xd7,0x7e,0x82,0x01,0x00]
+# W32-REAL16: v_sub_nc_i16 v5.l, exec_lo, -1 ; encoding: [0x05,0x00,0x0e,0xd7,0x7e,0x82,0x01,0x00]
+# W32-FAKE16: v_sub_nc_i16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x0e,0xd7,0x7e,0x82,0x01,0x00]
+# W64-REAL16: v_sub_nc_i16 v5.l, exec_lo, -1 ; encoding: [0x05,0x00,0x0e,0xd7,0x7e,0x82,0x01,0x00]
+# W64-FAKE16: v_sub_nc_i16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x0e,0xd7,0x7e,0x82,0x01,0x00]
0x05,0x00,0x0e,0xd7,0x7e,0x82,0x01,0x00
-# GFX12: v_sub_nc_i16 v5, exec_hi, null ; encoding: [0x05,0x00,0x0e,0xd7,0x7f,0xf8,0x00,0x00]
+# W32-REAL16: v_sub_nc_i16 v5.l, exec_hi, null ; encoding: [0x05,0x00,0x0e,0xd7,0x7f,0xf8,0x00,0x00]
+# W32-FAKE16: v_sub_nc_i16 v5, exec_hi, null ; encoding: [0x05,0x00,0x0e,0xd7,0x7f,0xf8,0x00,0x00]
+# W64-REAL16: v_sub_nc_i16 v5.l, exec_hi, null ; encoding: [0x05,0x00,0x0e,0xd7,0x7f,0xf8,0x00,0x00]
+# W64-FAKE16: v_sub_nc_i16 v5, exec_hi, null ; encoding: [0x05,0x00,0x0e,0xd7,0x7f,0xf8,0x00,0x00]
0x05,0x00,0x0e,0xd7,0x7f,0xf8,0x00,0x00
-# GFX12: v_sub_nc_i16 v5, null, exec_lo ; encoding: [0x05,0x00,0x0e,0xd7,0x7c,0xfc,0x00,0x00]
+# W32-REAL16: v_sub_nc_i16 v5.l, null, exec_lo ; encoding: [0x05,0x00,0x0e,0xd7,0x7c,0xfc,0x00,0x00]
+# W32-FAKE16: v_sub_nc_i16 v5, null, exec_lo ; encoding: [0x05,0x00,0x0e,0xd7,0x7c,0xfc,0x00,0x00]
+# W64-REAL16: v_sub_nc_i16 v5.l, null, exec_lo ; encoding: [0x05,0x00,0x0e,0xd7,0x7c,0xfc,0x00,0x00]
+# W64-FAKE16: v_sub_nc_i16 v5, null, exec_lo ; encoding: [0x05,0x00,0x0e,0xd7,0x7c,0xfc,0x00,0x00]
0x05,0x00,0x0e,0xd7,0x7c,0xfc,0x00,0x00
-# GFX12: v_sub_nc_i16 v5, -1, exec_hi op_sel:[1,1,1] ; encoding: [0x05,0x58,0x0e,0xd7,0xc1,0xfe,0x00,0x00]
+# W32-REAL16: v_sub_nc_i16 v5.h, -1, exec_hi op_sel:[1,1,1] ; encoding: [0x05,0x58,0x0e,0xd7,0xc1,0xfe,0x00,0x00]
+# W32-FAKE16: v_sub_nc_i16 v5, -1, exec_hi op_sel:[1,1,1] ; encoding: [0x05,0x58,0x0e,0xd7,0xc1,0xfe,0x00,0x00]
+# W64-REAL16: v_sub_nc_i16 v5.h, -1, exec_hi op_sel:[1,1,1] ; encoding: [0x05,0x58,0x0e,0xd7,0xc1,0xfe,0x00,0x00]
+# W64-FAKE16: v_sub_nc_i16 v5, -1, exec_hi op_sel:[1,1,1] ; encoding: [0x05,0x58,0x0e,0xd7,0xc1,0xfe,0x00,0x00]
0x05,0x58,0x0e,0xd7,0xc1,0xfe,0x00,0x00
-# GFX12: v_sub_nc_i16 v5, 0x3800, m0 op_sel:[1,0,0]
+# W32-REAL16: v_sub_nc_i16 v5.l, 0x3800, m0 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0e,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00]
+# W32-FAKE16: v_sub_nc_i16 v5, 0x3800, m0 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0e,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00]
+# W64-REAL16: v_sub_nc_i16 v5.l, 0x3800, m0 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0e,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00]
+# W64-FAKE16: v_sub_nc_i16 v5, 0x3800, m0 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0e,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00]
0x05,0x08,0x0e,0xd7,0xf0,0xfa,0x00,0x00
-# GFX12: v_sub_nc_i16 v5, src_scc, vcc_lo op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xfd,0xd4,0x00,0x00]
+# W32-REAL16: v_sub_nc_i16 v5.l, src_scc, vcc_lo op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xfd,0xd4,0x00,0x00]
+# W32-FAKE16: v_sub_nc_i16 v5, src_scc, vcc_lo op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xfd,0xd4,0x00,0x00]
+# W64-REAL16: v_sub_nc_i16 v5.l, src_scc, vcc_lo op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xfd,0xd4,0x00,0x00]
+# W64-FAKE16: v_sub_nc_i16 v5, src_scc, vcc_lo op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xfd,0xd4,0x00,0x00]
0x05,0x10,0x0e,0xd7,0xfd,0xd4,0x00,0x00
-# GFX12: v_sub_nc_i16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0e,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W32-REAL16: v_sub_nc_i16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0e,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_sub_nc_i16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0e,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_sub_nc_i16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0e,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_sub_nc_i16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0e,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+0xff,0xc0,0x0e,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00
+
+# GFX11: v_sub_nc_i16 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0e,0xd7,0x01,0x05,0x02,0x00]
+# W32-REAL16: v_sub_nc_i16 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0e,0xd7,0x01,0x05,0x02,0x00]
+# W32-FAKE16: v_sub_nc_i16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0e,0xd7,0x01,0x05,0x02,0x00]
+# W64-REAL16: v_sub_nc_i16 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0e,0xd7,0x01,0x05,0x02,0x00]
+# W64-FAKE16: v_sub_nc_i16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0e,0xd7,0x01,0x05,0x02,0x00]
+0x05,0x08,0x0e,0xd7,0x01,0x05,0x02,0x00
+
+# GFX11: v_sub_nc_i16 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xff,0xff,0x03,0x00]
+# W32-REAL16: v_sub_nc_i16 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xff,0xff,0x03,0x00]
+# W32-FAKE16: v_sub_nc_i16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xff,0xff,0x03,0x00]
+# W64-REAL16: v_sub_nc_i16 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xff,0xff,0x03,0x00]
+# W64-FAKE16: v_sub_nc_i16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xff,0xff,0x03,0x00]
+0x05,0x10,0x0e,0xd7,0xff,0xff,0x03,0x00
+
+# GFX11: v_sub_nc_i16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0e,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W32-REAL16: v_sub_nc_i16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0e,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_sub_nc_i16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0e,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_sub_nc_i16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0e,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_sub_nc_i16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0e,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
0xff,0xc0,0x0e,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00
# GFX12: v_sub_nc_i32 v5, v1, v2 ; encoding: [0x05,0x00,0x25,0xd7,0x01,0x05,0x02,0x00]
@@ -5887,49 +6085,115 @@
# GFX12: v_sub_nc_i32 v255, 0xaf123456, vcc_hi clamp ; encoding: [0xff,0x80,0x25,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf]
0xff,0x80,0x25,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf
-# GFX12: v_sub_nc_u16 v5, v1, v2 ; encoding: [0x05,0x00,0x04,0xd7,0x01,0x05,0x02,0x00]
+# W32-REAL16: v_sub_nc_u16 v5.l, v1.l, v2.l ; encoding: [0x05,0x00,0x04,0xd7,0x01,0x05,0x02,0x00]
+# W32-FAKE16: v_sub_nc_u16 v5, v1, v2 ; encoding: [0x05,0x00,0x04,0xd7,0x01,0x05,0x02,0x00]
+# W64-REAL16: v_sub_nc_u16 v5.l, v1.l, v2.l ; encoding: [0x05,0x00,0x04,0xd7,0x01,0x05,0x02,0x00]
+# W64-FAKE16: v_sub_nc_u16 v5, v1, v2 ; encoding: [0x05,0x00,0x04,0xd7,0x01,0x05,0x02,0x00]
0x05,0x00,0x04,0xd7,0x01,0x05,0x02,0x00
-# GFX12: v_sub_nc_u16 v5, v255, v255 ; encoding: [0x05,0x00,0x04,0xd7,0xff,0xff,0x03,0x00]
+# W32-REAL16: v_sub_nc_u16 v5.l, v255.l, v255.l ; encoding: [0x05,0x00,0x04,0xd7,0xff,0xff,0x03,0x00]
+# W32-FAKE16: v_sub_nc_u16 v5, v255, v255 ; encoding: [0x05,0x00,0x04,0xd7,0xff,0xff,0x03,0x00]
+# W64-REAL16: v_sub_nc_u16 v5.l, v255.l, v255.l ; encoding: [0x05,0x00,0x04,0xd7,0xff,0xff,0x03,0x00]
+# W64-FAKE16: v_sub_nc_u16 v5, v255, v255 ; encoding: [0x05,0x00,0x04,0xd7,0xff,0xff,0x03,0x00]
0x05,0x00,0x04,0xd7,0xff,0xff,0x03,0x00
-# GFX12: v_sub_nc_u16 v5, s1, s2 ; encoding: [0x05,0x00,0x04,0xd7,0x01,0x04,0x00,0x00]
+# W32-REAL16: v_sub_nc_u16 v5.l, s1, s2 ; encoding: [0x05,0x00,0x04,0xd7,0x01,0x04,0x00,0x00]
+# W32-FAKE16: v_sub_nc_u16 v5, s1, s2 ; encoding: [0x05,0x00,0x04,0xd7,0x01,0x04,0x00,0x00]
+# W64-REAL16: v_sub_nc_u16 v5.l, s1, s2 ; encoding: [0x05,0x00,0x04,0xd7,0x01,0x04,0x00,0x00]
+# W64-FAKE16: v_sub_nc_u16 v5, s1, s2 ; encoding: [0x05,0x00,0x04,0xd7,0x01,0x04,0x00,0x00]
0x05,0x00,0x04,0xd7,0x01,0x04,0x00,0x00
-# GFX12: v_sub_nc_u16 v5, s105, s105 ; encoding: [0x05,0x00,0x04,0xd7,0x69,0xd2,0x00,0x00]
+# W32-REAL16: v_sub_nc_u16 v5.l, s105, s105 ; encoding: [0x05,0x00,0x04,0xd7,0x69,0xd2,0x00,0x00]
+# W32-FAKE16: v_sub_nc_u16 v5, s105, s105 ; encoding: [0x05,0x00,0x04,0xd7,0x69,0xd2,0x00,0x00]
+# W64-REAL16: v_sub_nc_u16 v5.l, s105, s105 ; encoding: [0x05,0x00,0x04,0xd7,0x69,0xd2,0x00,0x00]
+# W64-FAKE16: v_sub_nc_u16 v5, s105, s105 ; encoding: [0x05,0x00,0x04,0xd7,0x69,0xd2,0x00,0x00]
0x05,0x00,0x04,0xd7,0x69,0xd2,0x00,0x00
-# GFX12: v_sub_nc_u16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x04,0xd7,0x6a,0xf6,0x00,0x00]
+# W32-REAL16: v_sub_nc_u16 v5.l, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x04,0xd7,0x6a,0xf6,0x00,0x00]
+# W32-FAKE16: v_sub_nc_u16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x04,0xd7,0x6a,0xf6,0x00,0x00]
+# W64-REAL16: v_sub_nc_u16 v5.l, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x04,0xd7,0x6a,0xf6,0x00,0x00]
+# W64-FAKE16: v_sub_nc_u16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x04,0xd7,0x6a,0xf6,0x00,0x00]
0x05,0x00,0x04,0xd7,0x6a,0xf6,0x00,0x00
-# GFX12: v_sub_nc_u16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x04,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+# W32-REAL16: v_sub_nc_u16 v5.l, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x04,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_sub_nc_u16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x04,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_sub_nc_u16 v5.l, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x04,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_sub_nc_u16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x04,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
0x05,0x00,0x04,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00
-# GFX12: v_sub_nc_u16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x04,0xd7,0x7b,0xfa,0x01,0x00]
+# W32-REAL16: v_sub_nc_u16 v5.l, ttmp15, src_scc ; encoding: [0x05,0x00,0x04,0xd7,0x7b,0xfa,0x01,0x00]
+# W32-FAKE16: v_sub_nc_u16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x04,0xd7,0x7b,0xfa,0x01,0x00]
+# W64-REAL16: v_sub_nc_u16 v5.l, ttmp15, src_scc ; encoding: [0x05,0x00,0x04,0xd7,0x7b,0xfa,0x01,0x00]
+# W64-FAKE16: v_sub_nc_u16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x04,0xd7,0x7b,0xfa,0x01,0x00]
0x05,0x00,0x04,0xd7,0x7b,0xfa,0x01,0x00
-# GFX12: v_sub_nc_u16 v5, m0, 0x3800
+# W32-REAL16: v_sub_nc_u16 v5.l, m0, 0x3800 ; encoding: [0x05,0x00,0x04,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00]
+# W32-FAKE16: v_sub_nc_u16 v5, m0, 0x3800 ; encoding: [0x05,0x00,0x04,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00]
+# W64-REAL16: v_sub_nc_u16 v5.l, m0, 0x3800 ; encoding: [0x05,0x00,0x04,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00]
+# W64-FAKE16: v_sub_nc_u16 v5, m0, 0x3800 ; encoding: [0x05,0x00,0x04,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00]
0x05,0x00,0x04,0xd7,0x7d,0xe0,0x01,0x00
-# GFX12: v_sub_nc_u16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x04,0xd7,0x7e,0x82,0x01,0x00]
+# W32-REAL16: v_sub_nc_u16 v5.l, exec_lo, -1 ; encoding: [0x05,0x00,0x04,0xd7,0x7e,0x82,0x01,0x00]
+# W32-FAKE16: v_sub_nc_u16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x04,0xd7,0x7e,0x82,0x01,0x00]
+# W64-REAL16: v_sub_nc_u16 v5.l, exec_lo, -1 ; encoding: [0x05,0x00,0x04,0xd7,0x7e,0x82,0x01,0x00]
+# W64-FAKE16: v_sub_nc_u16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x04,0xd7,0x7e,0x82,0x01,0x00]
0x05,0x00,0x04,0xd7,0x7e,0x82,0x01,0x00
-# GFX12: v_sub_nc_u16 v5, exec_hi, null ; encoding: [0x05,0x00,0x04,0xd7,0x7f,0xf8,0x00,0x00]
+# W32-REAL16: v_sub_nc_u16 v5.l, exec_hi, null ; encoding: [0x05,0x00,0x04,0xd7,0x7f,0xf8,0x00,0x00]
+# W32-FAKE16: v_sub_nc_u16 v5, exec_hi, null ; encoding: [0x05,0x00,0x04,0xd7,0x7f,0xf8,0x00,0x00]
+# W64-REAL16: v_sub_nc_u16 v5.l, exec_hi, null ; encoding: [0x05,0x00,0x04,0xd7,0x7f,0xf8,0x00,0x00]
+# W64-FAKE16: v_sub_nc_u16 v5, exec_hi, null ; encoding: [0x05,0x00,0x04,0xd7,0x7f,0xf8,0x00,0x00]
0x05,0x00,0x04,0xd7,0x7f,0xf8,0x00,0x00
-# GFX12: v_sub_nc_u16 v5, null, exec_lo ; encoding: [0x05,0x00,0x04,0xd7,0x7c,0xfc,0x00,0x00]
+# W32-REAL16: v_sub_nc_u16 v5.l, null, exec_lo ; encoding: [0x05,0x00,0x04,0xd7,0x7c,0xfc,0x00,0x00]
+# W32-FAKE16: v_sub_nc_u16 v5, null, exec_lo ; encoding: [0x05,0x00,0x04,0xd7,0x7c,0xfc,0x00,0x00]
+# W64-REAL16: v_sub_nc_u16 v5.l, null, exec_lo ; encoding: [0x05,0x00,0x04,0xd7,0x7c,0xfc,0x00,0x00]
+# W64-FAKE16: v_sub_nc_u16 v5, null, exec_lo ; encoding: [0x05,0x00,0x04,0xd7,0x7c,0xfc,0x00,0x00]
0x05,0x00,0x04,0xd7,0x7c,0xfc,0x00,0x00
-# GFX12: v_sub_nc_u16 v5, -1, exec_hi op_sel:[1,1,1] ; encoding: [0x05,0x58,0x04,0xd7,0xc1,0xfe,0x00,0x00]
+# W32-REAL16: v_sub_nc_u16 v5.h, -1, exec_hi op_sel:[1,1,1] ; encoding: [0x05,0x58,0x04,0xd7,0xc1,0xfe,0x00,0x00]
+# W32-FAKE16: v_sub_nc_u16 v5, -1, exec_hi op_sel:[1,1,1] ; encoding: [0x05,0x58,0x04,0xd7,0xc1,0xfe,0x00,0x00]
+# W64-REAL16: v_sub_nc_u16 v5.h, -1, exec_hi op_sel:[1,1,1] ; encoding: [0x05,0x58,0x04,0xd7,0xc1,0xfe,0x00,0x00]
+# W64-FAKE16: v_sub_nc_u16 v5, -1, exec_hi op_sel:[1,1,1] ; encoding: [0x05,0x58,0x04,0xd7,0xc1,0xfe,0x00,0x00]
0x05,0x58,0x04,0xd7,0xc1,0xfe,0x00,0x00
-# GFX12: v_sub_nc_u16 v5, 0x3800, m0 op_sel:[1,0,0]
+# W32-REAL16: v_sub_nc_u16 v5.l, 0x3800, m0 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x04,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00]
+# W32-FAKE16: v_sub_nc_u16 v5, 0x3800, m0 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x04,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00]
+# W64-REAL16: v_sub_nc_u16 v5.l, 0x3800, m0 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x04,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00]
+# W64-FAKE16: v_sub_nc_u16 v5, 0x3800, m0 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x04,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00]
0x05,0x08,0x04,0xd7,0xf0,0xfa,0x00,0x00
-# GFX12: v_sub_nc_u16 v5, src_scc, vcc_lo op_sel:[0,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xfd,0xd4,0x00,0x00]
+# W32-REAL16: v_sub_nc_u16 v5.l, src_scc, vcc_lo op_sel:[0,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xfd,0xd4,0x00,0x00]
+# W32-FAKE16: v_sub_nc_u16 v5, src_scc, vcc_lo op_sel:[0,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xfd,0xd4,0x00,0x00]
+# W64-REAL16: v_sub_nc_u16 v5.l, src_scc, vcc_lo op_sel:[0,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xfd,0xd4,0x00,0x00]
+# W64-FAKE16: v_sub_nc_u16 v5, src_scc, vcc_lo op_sel:[0,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xfd,0xd4,0x00,0x00]
0x05,0x10,0x04,0xd7,0xfd,0xd4,0x00,0x00
-# GFX12: v_sub_nc_u16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x04,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W32-REAL16: v_sub_nc_u16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x04,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_sub_nc_u16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x04,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_sub_nc_u16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x04,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_sub_nc_u16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x04,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+0xff,0xc0,0x04,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00
+
+# GFX11: v_sub_nc_u16 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x04,0xd7,0x01,0x05,0x02,0x00]
+# W32-REAL16: v_sub_nc_u16 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x04,0xd7,0x01,0x05,0x02,0x00]
+# W32-FAKE16: v_sub_nc_u16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x04,0xd7,0x01,0x05,0x02,0x00]
+# W64-REAL16: v_sub_nc_u16 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x04,0xd7,0x01,0x05,0x02,0x00]
+# W64-FAKE16: v_sub_nc_u16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x04,0xd7,0x01,0x05,0x02,0x00]
+0x05,0x08,0x04,0xd7,0x01,0x05,0x02,0x00
+
+# GFX11: v_sub_nc_u16 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xff,0xff,0x03,0x00]
+# W32-REAL16: v_sub_nc_u16 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xff,0xff,0x03,0x00]
+# W32-FAKE16: v_sub_nc_u16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xff,0xff,0x03,0x00]
+# W64-REAL16: v_sub_nc_u16 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xff,0xff,0x03,0x00]
+# W64-FAKE16: v_sub_nc_u16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xff,0xff,0x03,0x00]
+0x05,0x10,0x04,0xd7,0xff,0xff,0x03,0x00
+
+# GFX11: v_sub_nc_u16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x04,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W32-REAL16: v_sub_nc_u16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x04,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_sub_nc_u16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x04,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_sub_nc_u16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x04,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_sub_nc_u16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x04,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
0xff,0xc0,0x04,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00
# W32: v_subrev_co_u32 v5, s12, v1, v2 ; encoding: [0x05,0x0c,0x02,0xd7,0x01,0x05,0x02,0x00]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp16.txt
index 65cfdd5ef7de03..5815a6be79954d 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp16.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp16.txt
@@ -4115,88 +4115,276 @@
# W64-FAKE16: v_xor_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x64,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
0xff,0x00,0x64,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30
-# GFX12: v_add_nc_i16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff
-# GFX12: v_add_nc_i16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff
-# GFX12: v_add_nc_i16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff
-# GFX12: v_add_nc_i16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff
-# GFX12: v_add_nc_i16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff
-# GFX12: v_add_nc_i16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff
-# GFX12: v_add_nc_i16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff
-# GFX12: v_add_nc_i16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff
-# GFX12: v_add_nc_i16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff
-# GFX12: v_add_nc_i16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff
-# GFX12: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-REAL16: v_add_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-REAL16: v_add_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff
-# GFX12: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01
-# GFX12: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13
-# GFX12: v_add_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W32-REAL16: v_add_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_add_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30
-# GFX12: v_add_nc_u16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W32-REAL16: v_add_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-REAL16: v_add_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff
+
+# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01
+
+# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13
+
+# W32-REAL16: v_add_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_add_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30
+
+# GFX11: v_add_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-REAL16: v_add_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-REAL16: v_add_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff
+
+# GFX11: v_add_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01
+
+# GFX11: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13
+
+# GFX11: v_add_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W32-REAL16: v_add_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_add_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30
+
+# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff
-# GFX12: v_add_nc_u16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff
-# GFX12: v_add_nc_u16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff
-# GFX12: v_add_nc_u16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff
-# GFX12: v_add_nc_u16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff
-# GFX12: v_add_nc_u16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff
-# GFX12: v_add_nc_u16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff
-# GFX12: v_add_nc_u16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff
-# GFX12: v_add_nc_u16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff
-# GFX12: v_add_nc_u16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff
-# GFX12: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-REAL16: v_add_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-REAL16: v_add_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff
+
+# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01
+
+# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13
+
+# W32-REAL16: v_add_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_add_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30
+
+# W32-REAL16: v_add_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-REAL16: v_add_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff
+
+# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01
+
+# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13
+
+# W32-REAL16: v_add_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_add_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30
+
+# GFX11: v_add_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-REAL16: v_add_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-REAL16: v_add_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff
-# GFX12: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# GFX11: v_add_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01
-# GFX12: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# GFX11: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13
-# GFX12: v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# GFX11: v_add_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W32-REAL16: v_add_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_add_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30
# GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
@@ -5000,88 +5188,276 @@
# GFX12: v_pack_b32_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x11,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
0xff,0x13,0x11,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30
-# GFX12: v_sub_nc_i16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff
-# GFX12: v_sub_nc_i16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff
-# GFX12: v_sub_nc_i16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff
-# GFX12: v_sub_nc_i16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff
-# GFX12: v_sub_nc_i16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff
-# GFX12: v_sub_nc_i16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff
-# GFX12: v_sub_nc_i16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff
-# GFX12: v_sub_nc_i16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff
-# GFX12: v_sub_nc_i16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff
-# GFX12: v_sub_nc_i16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff
-# GFX12: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-REAL16: v_sub_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff
-# GFX12: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01
-# GFX12: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13
-# GFX12: v_sub_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W32-REAL16: v_sub_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30
-# GFX12: v_sub_nc_u16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W32-REAL16: v_sub_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff
+
+# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01
+
+# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13
+
+# W32-REAL16: v_sub_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30
+
+# GFX11: v_sub_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-REAL16: v_sub_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff
+
+# GFX11: v_sub_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01
+
+# GFX11: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13
+
+# GFX11: v_sub_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W32-REAL16: v_sub_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30
+
+# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff
-# GFX12: v_sub_nc_u16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff
-# GFX12: v_sub_nc_u16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff
-# GFX12: v_sub_nc_u16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff
-# GFX12: v_sub_nc_u16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff
-# GFX12: v_sub_nc_u16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff
-# GFX12: v_sub_nc_u16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff
-# GFX12: v_sub_nc_u16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff
-# GFX12: v_sub_nc_u16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff
-# GFX12: v_sub_nc_u16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff
-# GFX12: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-REAL16: v_sub_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff
+
+# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01
+
+# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13
+
+# W32-REAL16: v_sub_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30
+
+# W32-REAL16: v_sub_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff
+
+# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01
+
+# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13
+
+# W32-REAL16: v_sub_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30
+
+# GFX11: v_sub_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-REAL16: v_sub_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff
-# GFX12: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# GFX11: v_sub_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01
-# GFX12: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# GFX11: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13
-# GFX12: v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# GFX11: v_sub_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W32-REAL16: v_sub_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30
# GFX12: v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x00,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp8.txt
index 4640b967cbc07b..8636891fca9483 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp8.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp8.txt
@@ -2393,34 +2393,168 @@
# W64-FAKE16: v_xor_b16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x64,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
0xff,0x00,0x64,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00
-# GFX12: v_add_nc_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
0x05,0x00,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# GFX12: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_add_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_add_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# GFX12: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
0x05,0x08,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# GFX12: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
0x05,0x10,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# GFX12: v_add_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W32-REAL16: v_add_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_add_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
0xff,0xc0,0x0d,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00
-# GFX12: v_add_nc_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_add_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_add_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
+
+# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+0x05,0x08,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
+
+# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+0x05,0x10,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
+
+# W32-REAL16: v_add_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_add_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+0xff,0xc0,0x0d,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00
+
+# GFX11: v_add_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_add_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_add_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
+
+# GFX11: v_add_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+0x05,0x08,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
+
+# GFX11: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+0x05,0x10,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
+
+# GFX11: v_add_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W32-REAL16: v_add_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_add_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+0xff,0xc0,0x0d,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00
+
+# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
0x05,0x00,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# GFX12: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_add_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_add_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+0x05,0x58,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
+
+# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+0x05,0x08,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
+
+# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+0x05,0x10,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
+
+# W32-REAL16: v_add_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_add_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+0xff,0xc0,0x03,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00
+
+# W32-REAL16: v_add_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_add_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+0x05,0x58,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
+
+# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+0x05,0x08,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
+
+# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+0x05,0x10,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
+
+# W32-REAL16: v_add_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_add_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+0xff,0xc0,0x03,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00
+
+# GFX11: v_add_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_add_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_add_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
0x05,0x58,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# GFX12: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# GFX11: v_add_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
0x05,0x08,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# GFX12: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# GFX11: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
0x05,0x10,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# GFX12: v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# GFX11: v_add_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W32-REAL16: v_add_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_add_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
0xff,0xc0,0x03,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00
# GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x12,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
@@ -3113,34 +3247,168 @@
# GFX12: v_pack_b32_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x11,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
0xff,0x13,0x11,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00
-# GFX12: v_sub_nc_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
0x05,0x00,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# GFX12: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_sub_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
0x05,0x58,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# GFX12: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
0x05,0x08,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# GFX12: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
0x05,0x10,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# GFX12: v_sub_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W32-REAL16: v_sub_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
0xff,0xc0,0x0e,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00
-# GFX12: v_sub_nc_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_sub_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+0x05,0x58,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
+
+# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+0x05,0x08,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
+
+# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+0x05,0x10,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
+
+# W32-REAL16: v_sub_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+0xff,0xc0,0x0e,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00
+
+# GFX11: v_sub_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_sub_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+0x05,0x58,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
+
+# GFX11: v_sub_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+0x05,0x08,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
+
+# GFX11: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+0x05,0x10,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
+
+# GFX11: v_sub_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W32-REAL16: v_sub_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+0xff,0xc0,0x0e,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00
+
+# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
0x05,0x00,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# GFX12: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_sub_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+0x05,0x58,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
+
+# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+0x05,0x08,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
+
+# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+0x05,0x10,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
+
+# W32-REAL16: v_sub_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+0xff,0xc0,0x04,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00
+
+# W32-REAL16: v_sub_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+0x05,0x58,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
+
+# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+0x05,0x08,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
+
+# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+0x05,0x10,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
+
+# W32-REAL16: v_sub_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+0xff,0xc0,0x04,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00
+
+# GFX11: v_sub_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_sub_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
0x05,0x58,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# GFX12: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# GFX11: v_sub_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
0x05,0x08,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# GFX12: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# GFX11: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
0x05,0x10,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# GFX12: v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# GFX11: v_sub_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W32-REAL16: v_sub_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
0xff,0xc0,0x04,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00
# GFX12: v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x00,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92]
>From a7d7a4230f95920695d111cc3db56a3c13e02e4d Mon Sep 17 00:00:00 2001
From: guochen2 <guochen2 at amd.com>
Date: Wed, 9 Oct 2024 13:19:41 -0400
Subject: [PATCH 2/2] [AMDGPU][True16][MC] VOPC profile and pesudo update for
true16/fake16
---
.../AMDGPU/AMDGPUInstructionSelector.cpp | 85 ++-
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 62 +-
llvm/lib/Target/AMDGPU/SIInstructions.td | 2 +-
llvm/lib/Target/AMDGPU/VOPCInstructions.td | 552 +++++++++++-------
llvm/lib/Target/AMDGPU/VOPInstructions.td | 11 +-
.../GlobalISel/inst-select-amdgcn.class.mir | 30 +-
.../inst-select-amdgcn.class.s16.mir | 15 +-
.../inst-select-amdgcn.fcmp.constants.w32.mir | 8 +-
.../inst-select-amdgcn.fcmp.constants.w64.mir | 8 +-
.../AMDGPU/GlobalISel/inst-select-brcond.mir | 2 +-
.../GlobalISel/inst-select-icmp.s16.mir | 48 +-
.../AMDGPU/fix-sgpr-copies-f16-fake16.mir | 20 +
.../AMDGPU/fix-sgpr-copies-f16-true16.mir | 23 +-
.../CodeGen/AMDGPU/fix-sgpr-copies-f16.mir | 20 -
llvm/test/CodeGen/AMDGPU/shrink-true16.mir | 8 +-
llvm/test/CodeGen/AMDGPU/vopc_dpp.mir | 24 +-
16 files changed, 592 insertions(+), 326 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index febf0711c7d574..104cb6caf1867d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -1104,10 +1104,13 @@ static int getV_CMPOpcode(CmpInst::Predicate P, unsigned Size,
if (Size == 16 && !ST.has16BitInsts())
return -1;
- const auto Select = [&](unsigned S16Opc, unsigned TrueS16Opc, unsigned S32Opc,
+ const auto Select = [&](unsigned S16Opc, unsigned TrueS16Opc,
+ unsigned FakeS16Opc, unsigned S32Opc,
unsigned S64Opc) {
if (Size == 16)
- return ST.hasTrue16BitInsts() ? TrueS16Opc : S16Opc;
+ return ST.hasTrue16BitInsts()
+ ? ST.useRealTrue16Insts() ? TrueS16Opc : FakeS16Opc
+ : S16Opc;
if (Size == 32)
return S32Opc;
return S64Opc;
@@ -1118,83 +1121,109 @@ static int getV_CMPOpcode(CmpInst::Predicate P, unsigned Size,
llvm_unreachable("Unknown condition code!");
case CmpInst::ICMP_NE:
return Select(AMDGPU::V_CMP_NE_U16_e64, AMDGPU::V_CMP_NE_U16_t16_e64,
- AMDGPU::V_CMP_NE_U32_e64, AMDGPU::V_CMP_NE_U64_e64);
+ AMDGPU::V_CMP_NE_U16_fake16_e64, AMDGPU::V_CMP_NE_U32_e64,
+ AMDGPU::V_CMP_NE_U64_e64);
case CmpInst::ICMP_EQ:
return Select(AMDGPU::V_CMP_EQ_U16_e64, AMDGPU::V_CMP_EQ_U16_t16_e64,
- AMDGPU::V_CMP_EQ_U32_e64, AMDGPU::V_CMP_EQ_U64_e64);
+ AMDGPU::V_CMP_EQ_U16_fake16_e64, AMDGPU::V_CMP_EQ_U32_e64,
+ AMDGPU::V_CMP_EQ_U64_e64);
case CmpInst::ICMP_SGT:
return Select(AMDGPU::V_CMP_GT_I16_e64, AMDGPU::V_CMP_GT_I16_t16_e64,
- AMDGPU::V_CMP_GT_I32_e64, AMDGPU::V_CMP_GT_I64_e64);
+ AMDGPU::V_CMP_GT_I16_fake16_e64, AMDGPU::V_CMP_GT_I32_e64,
+ AMDGPU::V_CMP_GT_I64_e64);
case CmpInst::ICMP_SGE:
return Select(AMDGPU::V_CMP_GE_I16_e64, AMDGPU::V_CMP_GE_I16_t16_e64,
- AMDGPU::V_CMP_GE_I32_e64, AMDGPU::V_CMP_GE_I64_e64);
+ AMDGPU::V_CMP_GE_I16_fake16_e64, AMDGPU::V_CMP_GE_I32_e64,
+ AMDGPU::V_CMP_GE_I64_e64);
case CmpInst::ICMP_SLT:
return Select(AMDGPU::V_CMP_LT_I16_e64, AMDGPU::V_CMP_LT_I16_t16_e64,
- AMDGPU::V_CMP_LT_I32_e64, AMDGPU::V_CMP_LT_I64_e64);
+ AMDGPU::V_CMP_LT_I16_fake16_e64, AMDGPU::V_CMP_LT_I32_e64,
+ AMDGPU::V_CMP_LT_I64_e64);
case CmpInst::ICMP_SLE:
return Select(AMDGPU::V_CMP_LE_I16_e64, AMDGPU::V_CMP_LE_I16_t16_e64,
- AMDGPU::V_CMP_LE_I32_e64, AMDGPU::V_CMP_LE_I64_e64);
+ AMDGPU::V_CMP_LE_I16_fake16_e64, AMDGPU::V_CMP_LE_I32_e64,
+ AMDGPU::V_CMP_LE_I64_e64);
case CmpInst::ICMP_UGT:
return Select(AMDGPU::V_CMP_GT_U16_e64, AMDGPU::V_CMP_GT_U16_t16_e64,
- AMDGPU::V_CMP_GT_U32_e64, AMDGPU::V_CMP_GT_U64_e64);
+ AMDGPU::V_CMP_GT_U16_fake16_e64, AMDGPU::V_CMP_GT_U32_e64,
+ AMDGPU::V_CMP_GT_U64_e64);
case CmpInst::ICMP_UGE:
return Select(AMDGPU::V_CMP_GE_U16_e64, AMDGPU::V_CMP_GE_U16_t16_e64,
- AMDGPU::V_CMP_GE_U32_e64, AMDGPU::V_CMP_GE_U64_e64);
+ AMDGPU::V_CMP_GE_U16_fake16_e64, AMDGPU::V_CMP_GE_U32_e64,
+ AMDGPU::V_CMP_GE_U64_e64);
case CmpInst::ICMP_ULT:
return Select(AMDGPU::V_CMP_LT_U16_e64, AMDGPU::V_CMP_LT_U16_t16_e64,
- AMDGPU::V_CMP_LT_U32_e64, AMDGPU::V_CMP_LT_U64_e64);
+ AMDGPU::V_CMP_LT_U16_fake16_e64, AMDGPU::V_CMP_LT_U32_e64,
+ AMDGPU::V_CMP_LT_U64_e64);
case CmpInst::ICMP_ULE:
return Select(AMDGPU::V_CMP_LE_U16_e64, AMDGPU::V_CMP_LE_U16_t16_e64,
- AMDGPU::V_CMP_LE_U32_e64, AMDGPU::V_CMP_LE_U64_e64);
+ AMDGPU::V_CMP_LE_U16_fake16_e64, AMDGPU::V_CMP_LE_U32_e64,
+ AMDGPU::V_CMP_LE_U64_e64);
case CmpInst::FCMP_OEQ:
return Select(AMDGPU::V_CMP_EQ_F16_e64, AMDGPU::V_CMP_EQ_F16_t16_e64,
- AMDGPU::V_CMP_EQ_F32_e64, AMDGPU::V_CMP_EQ_F64_e64);
+ AMDGPU::V_CMP_EQ_F16_fake16_e64, AMDGPU::V_CMP_EQ_F32_e64,
+ AMDGPU::V_CMP_EQ_F64_e64);
case CmpInst::FCMP_OGT:
return Select(AMDGPU::V_CMP_GT_F16_e64, AMDGPU::V_CMP_GT_F16_t16_e64,
- AMDGPU::V_CMP_GT_F32_e64, AMDGPU::V_CMP_GT_F64_e64);
+ AMDGPU::V_CMP_GT_F16_fake16_e64, AMDGPU::V_CMP_GT_F32_e64,
+ AMDGPU::V_CMP_GT_F64_e64);
case CmpInst::FCMP_OGE:
return Select(AMDGPU::V_CMP_GE_F16_e64, AMDGPU::V_CMP_GE_F16_t16_e64,
- AMDGPU::V_CMP_GE_F32_e64, AMDGPU::V_CMP_GE_F64_e64);
+ AMDGPU::V_CMP_GE_F16_fake16_e64, AMDGPU::V_CMP_GE_F32_e64,
+ AMDGPU::V_CMP_GE_F64_e64);
case CmpInst::FCMP_OLT:
return Select(AMDGPU::V_CMP_LT_F16_e64, AMDGPU::V_CMP_LT_F16_t16_e64,
- AMDGPU::V_CMP_LT_F32_e64, AMDGPU::V_CMP_LT_F64_e64);
+ AMDGPU::V_CMP_LT_F16_fake16_e64, AMDGPU::V_CMP_LT_F32_e64,
+ AMDGPU::V_CMP_LT_F64_e64);
case CmpInst::FCMP_OLE:
return Select(AMDGPU::V_CMP_LE_F16_e64, AMDGPU::V_CMP_LE_F16_t16_e64,
- AMDGPU::V_CMP_LE_F32_e64, AMDGPU::V_CMP_LE_F64_e64);
+ AMDGPU::V_CMP_LE_F16_fake16_e64, AMDGPU::V_CMP_LE_F32_e64,
+ AMDGPU::V_CMP_LE_F64_e64);
case CmpInst::FCMP_ONE:
return Select(AMDGPU::V_CMP_NEQ_F16_e64, AMDGPU::V_CMP_NEQ_F16_t16_e64,
- AMDGPU::V_CMP_NEQ_F32_e64, AMDGPU::V_CMP_NEQ_F64_e64);
+ AMDGPU::V_CMP_NEQ_F16_fake16_e64, AMDGPU::V_CMP_NEQ_F32_e64,
+ AMDGPU::V_CMP_NEQ_F64_e64);
case CmpInst::FCMP_ORD:
return Select(AMDGPU::V_CMP_O_F16_e64, AMDGPU::V_CMP_O_F16_t16_e64,
- AMDGPU::V_CMP_O_F32_e64, AMDGPU::V_CMP_O_F64_e64);
+ AMDGPU::V_CMP_O_F16_fake16_e64, AMDGPU::V_CMP_O_F32_e64,
+ AMDGPU::V_CMP_O_F64_e64);
case CmpInst::FCMP_UNO:
return Select(AMDGPU::V_CMP_U_F16_e64, AMDGPU::V_CMP_U_F16_t16_e64,
- AMDGPU::V_CMP_U_F32_e64, AMDGPU::V_CMP_U_F64_e64);
+ AMDGPU::V_CMP_U_F16_fake16_e64, AMDGPU::V_CMP_U_F32_e64,
+ AMDGPU::V_CMP_U_F64_e64);
case CmpInst::FCMP_UEQ:
return Select(AMDGPU::V_CMP_NLG_F16_e64, AMDGPU::V_CMP_NLG_F16_t16_e64,
- AMDGPU::V_CMP_NLG_F32_e64, AMDGPU::V_CMP_NLG_F64_e64);
+ AMDGPU::V_CMP_NLG_F16_fake16_e64, AMDGPU::V_CMP_NLG_F32_e64,
+ AMDGPU::V_CMP_NLG_F64_e64);
case CmpInst::FCMP_UGT:
return Select(AMDGPU::V_CMP_NLE_F16_e64, AMDGPU::V_CMP_NLE_F16_t16_e64,
- AMDGPU::V_CMP_NLE_F32_e64, AMDGPU::V_CMP_NLE_F64_e64);
+ AMDGPU::V_CMP_NLE_F16_fake16_e64, AMDGPU::V_CMP_NLE_F32_e64,
+ AMDGPU::V_CMP_NLE_F64_e64);
case CmpInst::FCMP_UGE:
return Select(AMDGPU::V_CMP_NLT_F16_e64, AMDGPU::V_CMP_NLT_F16_t16_e64,
- AMDGPU::V_CMP_NLT_F32_e64, AMDGPU::V_CMP_NLT_F64_e64);
+ AMDGPU::V_CMP_NLT_F16_fake16_e64, AMDGPU::V_CMP_NLT_F32_e64,
+ AMDGPU::V_CMP_NLT_F64_e64);
case CmpInst::FCMP_ULT:
return Select(AMDGPU::V_CMP_NGE_F16_e64, AMDGPU::V_CMP_NGE_F16_t16_e64,
- AMDGPU::V_CMP_NGE_F32_e64, AMDGPU::V_CMP_NGE_F64_e64);
+ AMDGPU::V_CMP_NGE_F16_fake16_e64, AMDGPU::V_CMP_NGE_F32_e64,
+ AMDGPU::V_CMP_NGE_F64_e64);
case CmpInst::FCMP_ULE:
return Select(AMDGPU::V_CMP_NGT_F16_e64, AMDGPU::V_CMP_NGT_F16_t16_e64,
- AMDGPU::V_CMP_NGT_F32_e64, AMDGPU::V_CMP_NGT_F64_e64);
+ AMDGPU::V_CMP_NGT_F16_fake16_e64, AMDGPU::V_CMP_NGT_F32_e64,
+ AMDGPU::V_CMP_NGT_F64_e64);
case CmpInst::FCMP_UNE:
return Select(AMDGPU::V_CMP_NEQ_F16_e64, AMDGPU::V_CMP_NEQ_F16_t16_e64,
- AMDGPU::V_CMP_NEQ_F32_e64, AMDGPU::V_CMP_NEQ_F64_e64);
+ AMDGPU::V_CMP_NEQ_F16_fake16_e64, AMDGPU::V_CMP_NEQ_F32_e64,
+ AMDGPU::V_CMP_NEQ_F64_e64);
case CmpInst::FCMP_TRUE:
return Select(AMDGPU::V_CMP_TRU_F16_e64, AMDGPU::V_CMP_TRU_F16_t16_e64,
- AMDGPU::V_CMP_TRU_F32_e64, AMDGPU::V_CMP_TRU_F64_e64);
+ AMDGPU::V_CMP_TRU_F16_fake16_e64, AMDGPU::V_CMP_TRU_F32_e64,
+ AMDGPU::V_CMP_TRU_F64_e64);
case CmpInst::FCMP_FALSE:
return Select(AMDGPU::V_CMP_F_F16_e64, AMDGPU::V_CMP_F_F16_t16_e64,
- AMDGPU::V_CMP_F_F32_e64, AMDGPU::V_CMP_F_F64_e64);
+ AMDGPU::V_CMP_F_F16_fake16_e64, AMDGPU::V_CMP_F_F32_e64,
+ AMDGPU::V_CMP_F_F64_e64);
}
}
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 5c39b2a4fc96aa..bb6743e1fe42ee 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -5497,20 +5497,48 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) const {
case AMDGPU::S_CMP_NLE_F32: return AMDGPU::V_CMP_NLE_F32_e64;
case AMDGPU::S_CMP_NEQ_F32: return AMDGPU::V_CMP_NEQ_F32_e64;
case AMDGPU::S_CMP_NLT_F32: return AMDGPU::V_CMP_NLT_F32_e64;
- case AMDGPU::S_CMP_LT_F16: return AMDGPU::V_CMP_LT_F16_t16_e64;
- case AMDGPU::S_CMP_EQ_F16: return AMDGPU::V_CMP_EQ_F16_t16_e64;
- case AMDGPU::S_CMP_LE_F16: return AMDGPU::V_CMP_LE_F16_t16_e64;
- case AMDGPU::S_CMP_GT_F16: return AMDGPU::V_CMP_GT_F16_t16_e64;
- case AMDGPU::S_CMP_LG_F16: return AMDGPU::V_CMP_LG_F16_t16_e64;
- case AMDGPU::S_CMP_GE_F16: return AMDGPU::V_CMP_GE_F16_t16_e64;
- case AMDGPU::S_CMP_O_F16: return AMDGPU::V_CMP_O_F16_t16_e64;
- case AMDGPU::S_CMP_U_F16: return AMDGPU::V_CMP_U_F16_t16_e64;
- case AMDGPU::S_CMP_NGE_F16: return AMDGPU::V_CMP_NGE_F16_t16_e64;
- case AMDGPU::S_CMP_NLG_F16: return AMDGPU::V_CMP_NLG_F16_t16_e64;
- case AMDGPU::S_CMP_NGT_F16: return AMDGPU::V_CMP_NGT_F16_t16_e64;
- case AMDGPU::S_CMP_NLE_F16: return AMDGPU::V_CMP_NLE_F16_t16_e64;
- case AMDGPU::S_CMP_NEQ_F16: return AMDGPU::V_CMP_NEQ_F16_t16_e64;
- case AMDGPU::S_CMP_NLT_F16: return AMDGPU::V_CMP_NLT_F16_t16_e64;
+ case AMDGPU::S_CMP_LT_F16:
+ return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_LT_F16_t16_e64
+ : AMDGPU::V_CMP_LT_F16_fake16_e64;
+ case AMDGPU::S_CMP_EQ_F16:
+ return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_EQ_F16_t16_e64
+ : AMDGPU::V_CMP_EQ_F16_fake16_e64;
+ case AMDGPU::S_CMP_LE_F16:
+ return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_LE_F16_t16_e64
+ : AMDGPU::V_CMP_LE_F16_fake16_e64;
+ case AMDGPU::S_CMP_GT_F16:
+ return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_GT_F16_t16_e64
+ : AMDGPU::V_CMP_GT_F16_fake16_e64;
+ case AMDGPU::S_CMP_LG_F16:
+ return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_LG_F16_t16_e64
+ : AMDGPU::V_CMP_LG_F16_fake16_e64;
+ case AMDGPU::S_CMP_GE_F16:
+ return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_GE_F16_t16_e64
+ : AMDGPU::V_CMP_GE_F16_fake16_e64;
+ case AMDGPU::S_CMP_O_F16:
+ return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_O_F16_t16_e64
+ : AMDGPU::V_CMP_O_F16_fake16_e64;
+ case AMDGPU::S_CMP_U_F16:
+ return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_U_F16_t16_e64
+ : AMDGPU::V_CMP_U_F16_fake16_e64;
+ case AMDGPU::S_CMP_NGE_F16:
+ return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NGE_F16_t16_e64
+ : AMDGPU::V_CMP_NGE_F16_fake16_e64;
+ case AMDGPU::S_CMP_NLG_F16:
+ return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NLG_F16_t16_e64
+ : AMDGPU::V_CMP_NLG_F16_fake16_e64;
+ case AMDGPU::S_CMP_NGT_F16:
+ return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NGT_F16_t16_e64
+ : AMDGPU::V_CMP_NGT_F16_fake16_e64;
+ case AMDGPU::S_CMP_NLE_F16:
+ return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NLE_F16_t16_e64
+ : AMDGPU::V_CMP_NLE_F16_fake16_e64;
+ case AMDGPU::S_CMP_NEQ_F16:
+ return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NEQ_F16_t16_e64
+ : AMDGPU::V_CMP_NEQ_F16_fake16_e64;
+ case AMDGPU::S_CMP_NLT_F16:
+ return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NLT_F16_t16_e64
+ : AMDGPU::V_CMP_NLT_F16_fake16_e64;
case AMDGPU::V_S_EXP_F32_e64: return AMDGPU::V_EXP_F32_e64;
case AMDGPU::V_S_EXP_F16_e64: return AMDGPU::V_EXP_F16_fake16_e64;
case AMDGPU::V_S_LOG_F32_e64: return AMDGPU::V_LOG_F32_e64;
@@ -7337,14 +7365,16 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist,
auto NewInstr =
BuildMI(*MBB, Inst, Inst.getDebugLoc(), get(NewOpcode), CondReg)
.setMIFlags(Inst.getFlags());
- if (AMDGPU::getNamedOperandIdx(NewOpcode,
- AMDGPU::OpName::src0_modifiers) >= 0) {
+ if (AMDGPU::hasNamedOperand(NewOpcode, AMDGPU::OpName::src0_modifiers)) {
NewInstr
.addImm(0) // src0_modifiers
.add(Inst.getOperand(0)) // src0
.addImm(0) // src1_modifiers
.add(Inst.getOperand(1)) // src1
.addImm(0); // clamp
+
+ if (AMDGPU::hasNamedOperand(NewOpcode, AMDGPU::OpName::op_sel))
+ NewInstr.addImm(0); // op_sel0
} else {
NewInstr
.add(Inst.getOperand(0))
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 8073aca7f197fb..a47a04e3e0fd42 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -3427,7 +3427,7 @@ def : GCNPat <
SRCMODS.NONE,
(V_MOV_B64_PSEUDO (i64 0x3fefffffffffffff))),
$x,
- (V_CMP_CLASS_F64_e64 SRCMODS.NONE, $x, (i32 3 /*NaN*/))))
+ (V_CMP_CLASS_F64_e64 SRCMODS.NONE, $x, SRCMODS.NONE, (i32 3 /*NaN*/))))
>;
} // End SubtargetPredicates = isGFX6
diff --git a/llvm/lib/Target/AMDGPU/VOPCInstructions.td b/llvm/lib/Target/AMDGPU/VOPCInstructions.td
index d6e08dce130ced..0d140863a37733 100644
--- a/llvm/lib/Target/AMDGPU/VOPCInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOPCInstructions.td
@@ -89,23 +89,43 @@ multiclass VOPC_Profile_t16<list<SchedReadWrite> sched, ValueType vt0, ValueType
def _t16 : VOPC_Profile<sched, vt0, vt1> {
let IsTrue16 = 1;
let IsRealTrue16 = 1;
- let Src1RC32 = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
- let Src0DPP = getVregSrcForVT<Src0VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
- let Src1DPP = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
- let Src2DPP = getVregSrcForVT<Src2VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
+ let HasOpSel = 1;
+ let HasModifiers = 1; // All instructions at least have OpSel
+ let Src1RC32 = getVregSrcForVT<Src1VT, 1/*IsTrue16*/>.ret;
+ let Src0DPP = getVregSrcForVT<Src0VT, 1/*IsTrue16*/>.ret;
+ let Src1DPP = getVregSrcForVT<Src1VT, 1/*IsTrue16*/>.ret;
+ let Src2DPP = getVregSrcForVT<Src2VT, 1/*IsTrue16*/>.ret;
let Src0ModDPP = getSrcModDPP_t16<Src0VT>.ret;
let Src1ModDPP = getSrcModDPP_t16<Src1VT>.ret;
let Src2ModDPP = getSrcModDPP_t16<Src2VT>.ret;
+
+ let DstRC64 = getVALUDstForVT<DstVT, 1, 1>.ret;
+ let Src0RC64 = getVOP3SrcForVT<Src0VT, 1/*IsTrue16*/>.ret;
+ let Src1RC64 = getVOP3SrcForVT<Src1VT, 1/*IsTrue16*/>.ret;
+ let Src2RC64 = getVOP3SrcForVT<Src2VT, 1/*IsTrue16*/>.ret;
+ let Src0Mod = getSrcMod<Src0VT, 1/*IsTrue16*/>.ret;
+ let Src1Mod = getSrcMod<Src1VT, 1/*IsTrue16*/>.ret;
+ let Src2Mod = getSrcMod<Src2VT, 1/*IsTrue16*/>.ret;
}
def _fake16: VOPC_Profile<sched, vt0, vt1> {
let IsTrue16 = 1;
+ let DstRC = getVALUDstForVT_fake16<DstVT>.ret;
+ let DstRC64 = getVALUDstForVT<DstVT>.ret;
+ let Src0RC32 = getVOPSrc0ForVT<Src0VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
+
let Src1RC32 = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
let Src0DPP = getVregSrcForVT<Src0VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
let Src1DPP = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
let Src2DPP = getVregSrcForVT<Src2VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
- let Src0ModDPP = getSrcModDPP_t16<Src0VT>.ret;
- let Src1ModDPP = getSrcModDPP_t16<Src1VT>.ret;
- let Src2ModDPP = getSrcModDPP_t16<Src2VT>.ret;
+ let Src0ModDPP = getSrcModDPP_t16<Src0VT, 1/*IsFake16*/>.ret;
+ let Src1ModDPP = getSrcModDPP_t16<Src1VT, 1/*IsFake16*/>.ret;
+ let Src2ModDPP = getSrcModDPP_t16<Src2VT, 1/*IsFake16*/>.ret;
+ let Src0VOP3DPP = VGPRSrc_32;
+ let Src1VOP3DPP = getVOP3DPPSrcForVT<Src1VT, 1/*IsFake16*/>.ret;
+ let Src2VOP3DPP = getVOP3DPPSrcForVT<Src2VT, 1/*IsFake16*/>.ret;
+ let Src0ModVOP3DPP = getSrc0ModVOP3DPP<Src0VT, DstVT, 1/*IsFake16*/>.ret;
+ let Src1ModVOP3DPP = getSrcModVOP3DPP<Src1VT, 1/*IsFake16*/>.ret;
+ let Src2ModVOP3DPP = getSrcModVOP3DPP<Src2VT, 1/*IsFake16*/>.ret;
}
}
@@ -119,8 +139,7 @@ class VOPC_NoSdst_Profile<list<SchedReadWrite> sched, ValueType vt0,
let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0,
Src1ModSDWA:$src1_modifiers, Src1SDWA:$src1,
src0_sel:$src0_sel, src1_sel:$src1_sel);
- let AsmVOP3Base = !if(Src0VT.isFP, "$src0_modifiers, $src1_modifiers$clamp",
- "$src0, $src1");
+ let HasDst = 0;
let AsmSDWA9 = "$src0_modifiers, $src1_modifiers $src0_sel $src1_sel";
let EmitDst = 0;
}
@@ -130,23 +149,39 @@ multiclass VOPC_NoSdst_Profile_t16<list<SchedReadWrite> sched, ValueType vt0, Va
def _t16 : VOPC_NoSdst_Profile<sched, vt0, vt1> {
let IsTrue16 = 1;
let IsRealTrue16 = 1;
- let Src1RC32 = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
- let Src0DPP = getVregSrcForVT<Src0VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
- let Src1DPP = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
- let Src2DPP = getVregSrcForVT<Src2VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
+ let HasOpSel = 1;
+ let HasModifiers = 1; // All instructions at least have OpSel
+ let Src1RC32 = getVregSrcForVT<Src1VT, 1/*IsTrue16*/>.ret;
+ let Src0DPP = getVregSrcForVT<Src0VT, 1/*IsTrue16*/>.ret;
+ let Src1DPP = getVregSrcForVT<Src1VT, 1/*IsTrue16*/>.ret;
+ let Src2DPP = getVregSrcForVT<Src2VT, 1/*IsTrue16*/>.ret;
let Src0ModDPP = getSrcModDPP_t16<Src0VT>.ret;
let Src1ModDPP = getSrcModDPP_t16<Src1VT>.ret;
let Src2ModDPP = getSrcModDPP_t16<Src2VT>.ret;
+
+ let Src0RC64 = getVOP3SrcForVT<Src0VT, 1/*IsTrue16*/>.ret;
+ let Src1RC64 = getVOP3SrcForVT<Src1VT, 1/*IsTrue16*/>.ret;
+ let Src2RC64 = getVOP3SrcForVT<Src2VT, 1/*IsTrue16*/>.ret;
+ let Src0Mod = getSrcMod<Src0VT, 1/*IsTrue16*/>.ret;
+ let Src1Mod = getSrcMod<Src1VT, 1/*IsTrue16*/>.ret;
+ let Src2Mod = getSrcMod<Src2VT, 1/*IsTrue16*/>.ret;
}
def _fake16 : VOPC_NoSdst_Profile<sched, vt0, vt1> {
let IsTrue16 = 1;
+ let Src0RC32 = getVOPSrc0ForVT<Src0VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
let Src1RC32 = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
let Src0DPP = getVregSrcForVT<Src0VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
let Src1DPP = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
let Src2DPP = getVregSrcForVT<Src2VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
- let Src0ModDPP = getSrcModDPP_t16<Src0VT>.ret;
- let Src1ModDPP = getSrcModDPP_t16<Src1VT>.ret;
- let Src2ModDPP = getSrcModDPP_t16<Src2VT>.ret;
+ let Src0ModDPP = getSrcModDPP_t16<Src0VT, 1/*IsFake16*/>.ret;
+ let Src1ModDPP = getSrcModDPP_t16<Src1VT, 1/*IsFake16*/>.ret;
+ let Src2ModDPP = getSrcModDPP_t16<Src2VT, 1/*IsFake16*/>.ret;
+ let Src0VOP3DPP = VGPRSrc_32;
+ let Src1VOP3DPP = getVOP3DPPSrcForVT<Src1VT, 1/*IsFake16*/>.ret;
+ let Src2VOP3DPP = getVOP3DPPSrcForVT<Src2VT, 1/*IsFake16*/>.ret;
+ let Src0ModVOP3DPP = getSrc0ModVOP3DPP<Src0VT, DstVT, 1/*IsFake16*/>.ret;
+ let Src1ModVOP3DPP = getSrcModVOP3DPP<Src1VT, 1/*IsFake16*/>.ret;
+ let Src2ModVOP3DPP = getSrcModVOP3DPP<Src2VT, 1/*IsFake16*/>.ret;
}
}
@@ -192,6 +227,8 @@ class VOPC_Real <VOPC_Pseudo ps, int EncodingFamily, string asm_name = ps.Pseudo
// copy relevant pseudo op flags
let SubtargetPredicate = ps.SubtargetPredicate;
+ let True16Predicate = ps.True16Predicate;
+ let OtherPredicates = ps.OtherPredicates;
let AsmMatchConverter = ps.AsmMatchConverter;
let Constraints = ps.Constraints;
let DisableEncoding = ps.DisableEncoding;
@@ -281,7 +318,9 @@ class getVOPCPat64 <SDPatternOperator cond, VOPProfile P> : LetDummies {
(setcc (P.Src0VT
!if(P.HasOMod,
(VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod),
- (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp))),
+ !if(P.HasClamp,
+ (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp),
+ (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers)))),
(P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers)),
cond))],
[(set i1:$sdst, (setcc P.Src0VT:$src0, P.Src1VT:$src1, cond))]);
@@ -314,7 +353,7 @@ multiclass VOPC_Pseudos <string opName,
let isCommutable = 1;
}
- def _e64 : VOP3_Pseudo<opName, P, getVOPCPat64<cond, P>.ret>,
+ def _e64 : VOP3_Pseudo<opName, P, getVOPCPat64<cond, P>.ret, 0/*IsVOP3P*/, P.HasOpSel>,
Commutable_REV<revOp#"_e64", !eq(revOp, opName)>,
VCMPXNoSDstTable<1, opName#"_e64">,
VCMPVCMPXTable<opName#"_e64"> {
@@ -322,6 +361,10 @@ multiclass VOPC_Pseudos <string opName,
let SchedRW = P.Schedule;
let isCompare = 1;
let isCommutable = 1;
+ let AsmMatchConverter =
+ !if (P.HasOpSel, "cvtVOP3OpSel",
+ !if (!or(P.HasModifiers, P.HasOMod, P.HasIntClamp), "cvtVOP3",
+ ""));
}
if P.HasExtSDWA then
@@ -373,7 +416,7 @@ multiclass VOPCX_Pseudos <string opName,
let IsVCMPX = 1;
}
- def _nosdst_e64 : VOP3_Pseudo<opName#"_nosdst", P_NoSDst>,
+ def _nosdst_e64 : VOP3_Pseudo<opName#"_nosdst", P_NoSDst, [], 0/*IsVOP3P*/, P_NoSDst.HasOpSel>,
Commutable_REV<revOp#"_nosdst_e64", !eq(revOp, opName)>,
VCMPXNoSDstTable<0, opName#"_e64">,
VCMPVCMPXTable<!subst("v_cmpx", "v_cmp", opName#"_e64")> {
@@ -383,6 +426,11 @@ multiclass VOPCX_Pseudos <string opName,
let isCommutable = 1;
let SubtargetPredicate = HasNoSdstCMPX;
let IsVCMPX = 1;
+ let AsmMatchConverter =
+ !if (P_NoSDst.HasOpSel, "cvtVOP3OpSel",
+ !if (!or(P_NoSDst.HasModifiers, P_NoSDst.HasOMod,
+ P_NoSDst.HasIntClamp),
+ "cvtVOP3", ""));
}
if P_NoSDst.HasExtSDWA then
@@ -464,9 +512,10 @@ multiclass VOPC_I16 <string opName, SDPatternOperator cond = COND_NULL,
multiclass VOPC_I32 <string opName, SDPatternOperator cond = COND_NULL, string revOp = opName> :
VOPC_Pseudos <opName, VOPC_I1_I32_I32, cond, revOp, 0>;
-multiclass VOPC_I64 <string opName, SDPatternOperator cond = COND_NULL, string revOp = opName> :
- VOPC_Pseudos <opName, VOPC_I1_I64_I64, cond, revOp, 0>;
-
+let IsInvalidSingleUseConsumer = 1 in {
+ multiclass VOPC_I64 <string opName, SDPatternOperator cond = COND_NULL, string revOp = opName> :
+ VOPC_Pseudos <opName, VOPC_I1_I64_I64, cond, revOp, 0>;
+}
multiclass VOPCX_F16<string opName, string revOp = opName> {
let OtherPredicates = [Has16BitInsts], True16Predicate = NotHasTrue16BitInsts in {
@@ -501,8 +550,10 @@ multiclass VOPCX_I16<string opName, string revOp = opName> {
multiclass VOPCX_I32 <string opName, string revOp = opName> :
VOPCX_Pseudos <opName, VOPC_I1_I32_I32, VOPC_I32_I32, COND_NULL, revOp>;
-multiclass VOPCX_I64 <string opName, string revOp = opName> :
- VOPCX_Pseudos <opName, VOPC_I1_I64_I64, VOPC_I64_I64, COND_NULL, revOp>;
+let IsInvalidSingleUseConsumer = 1 in {
+ multiclass VOPCX_I64 <string opName, string revOp = opName> :
+ VOPCX_Pseudos <opName, VOPC_I1_I64_I64, VOPC_I64_I64, COND_NULL, revOp>;
+}
//===----------------------------------------------------------------------===//
@@ -801,52 +852,57 @@ defm V_CMPX_T_U64 : VOPCX_I64 <"v_cmpx_t_u64">;
class VOPC_Class_Profile<list<SchedReadWrite> sched, ValueType src0VT, ValueType src1VT = i32> :
VOPC_Profile<sched, src0VT, src1VT> {
- let AsmDPP = "$src0_modifiers, $src1 $dpp_ctrl$row_mask$bank_mask$bound_ctrl";
- let AsmDPP16 = AsmDPP#"$fi";
- let InsDPP = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0, Src1DPP:$src1, dpp_ctrl:$dpp_ctrl, DppRowMask:$row_mask, DppBankMask:$bank_mask, DppBoundCtrl:$bound_ctrl);
- let InsDPP16 = !con(InsDPP, (ins Dpp16FI:$fi));
- // DPP8 forbids modifiers and can inherit from VOPC_Profile
-
- let Ins64 = (ins Src0Mod:$src0_modifiers, Src0RC64:$src0, Src1RC64:$src1);
- dag InsPartVOP3DPP = (ins FPVRegInputMods:$src0_modifiers, VGPRSrc_32:$src0, VCSrc_b32:$src1);
- let InsVOP3Base = !con(InsPartVOP3DPP, !if(HasOpSel, (ins op_sel0:$op_sel),
- (ins)));
- let AsmVOP3Base = "$sdst, $src0_modifiers, $src1";
-
let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0,
Src1ModSDWA:$src1_modifiers, Src1SDWA:$src1,
Clamp:$clamp, src0_sel:$src0_sel, src1_sel:$src1_sel);
let AsmSDWA = " vcc, $src0_modifiers, $src1_modifiers$clamp $src0_sel $src1_sel";
- let HasSrc1Mods = 0;
let HasClamp = 0;
let HasOMod = 0;
}
multiclass VOPC_Class_Profile_t16<list<SchedReadWrite> sched> {
def NAME : VOPC_Class_Profile<sched, f16>;
- def _t16 : VOPC_Class_Profile<sched, f16, i16> {
+ def _t16 : VOPC_Class_Profile<sched, f16, f16> {
let IsTrue16 = 1;
let IsRealTrue16 = 1;
- let Src1RC32 = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
- let Src1RC64 = VSrc_b32;
- let Src0DPP = getVregSrcForVT<Src0VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
- let Src1DPP = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
- let Src2DPP = getVregSrcForVT<Src2VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
+ let HasOpSel = 1;
+ let HasModifiers = 1; // All instructions at least have OpSel
+ let Src1RC32 = getVregSrcForVT<Src1VT, 1/*IsTrue16*/>.ret;
+ let Src0DPP = getVregSrcForVT<Src0VT, 1/*IsTrue16*/>.ret;
+ let Src1DPP = getVregSrcForVT<Src1VT, 1/*IsTrue16*/>.ret;
+ let Src2DPP = getVregSrcForVT<Src2VT, 1/*IsTrue16*/>.ret;
let Src0ModDPP = getSrcModDPP_t16<Src0VT>.ret;
let Src1ModDPP = getSrcModDPP_t16<Src1VT>.ret;
let Src2ModDPP = getSrcModDPP_t16<Src2VT>.ret;
+
+ let DstRC64 = getVALUDstForVT<DstVT, 1, 1>.ret;
+ let Src0RC64 = getVOP3SrcForVT<Src0VT, 1/*IsTrue16*/>.ret;
+ let Src1RC64 = getVOP3SrcForVT<Src1VT, 1/*IsTrue16*/>.ret;
+ let Src2RC64 = getVOP3SrcForVT<Src2VT, 1/*IsTrue16*/>.ret;
+ let Src0Mod = getSrcMod<Src0VT, 1/*IsTrue16*/>.ret;
+ let Src1Mod = getSrcMod<Src1VT, 1/*IsTrue16*/>.ret;
+ let Src2Mod = getSrcMod<Src2VT, 1/*IsTrue16*/>.ret;
}
- def _fake16 : VOPC_Class_Profile<sched, f16, i16> {
+ def _fake16 : VOPC_Class_Profile<sched, f16, f16> {
let IsTrue16 = 1;
+ let DstRC = getVALUDstForVT_fake16<DstVT>.ret;
+ let DstRC64 = getVALUDstForVT<DstVT>.ret;
+ let Src0RC32 = getVOPSrc0ForVT<Src0VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
let Src1RC32 = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
let Src1RC64 = VSrc_b32;
let Src0DPP = getVregSrcForVT<Src0VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
let Src1DPP = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
let Src2DPP = getVregSrcForVT<Src2VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
- let Src0ModDPP = getSrcModDPP_t16<Src0VT>.ret;
- let Src1ModDPP = getSrcModDPP_t16<Src1VT>.ret;
- let Src2ModDPP = getSrcModDPP_t16<Src2VT>.ret;
+ let Src0ModDPP = getSrcModDPP_t16<Src0VT, 1/*IsFake16*/>.ret;
+ let Src1ModDPP = getSrcModDPP_t16<Src1VT, 1/*IsFake16*/>.ret;
+ let Src2ModDPP = getSrcModDPP_t16<Src2VT, 1/*IsFake16*/>.ret;
+ let Src0VOP3DPP = VGPRSrc_32;
+ let Src1VOP3DPP = getVOP3DPPSrcForVT<Src1VT, 1/*IsFake16*/>.ret;
+ let Src2VOP3DPP = getVOP3DPPSrcForVT<Src2VT, 1/*IsFake16*/>.ret;
+ let Src0ModVOP3DPP = getSrc0ModVOP3DPP<Src0VT, DstVT, 1/*IsFake16*/>.ret;
+ let Src1ModVOP3DPP = getSrcModVOP3DPP<Src1VT, 1/*IsFake16*/>.ret;
+ let Src2ModVOP3DPP = getSrcModVOP3DPP<Src2VT, 1/*IsFake16*/>.ret;
}
}
@@ -857,49 +913,85 @@ class VOPC_Class_NoSdst_Profile<list<SchedReadWrite> sched, ValueType src0VT, Va
let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0,
Src1ModSDWA:$src1_modifiers, Src1SDWA:$src1,
src0_sel:$src0_sel, src1_sel:$src1_sel);
- let AsmVOP3Base = "$src0_modifiers, $src1";
+ let HasDst = 0;
let AsmSDWA9 = "$src0_modifiers, $src1_modifiers $src0_sel $src1_sel";
let EmitDst = 0;
}
multiclass VOPC_Class_NoSdst_Profile_t16<list<SchedReadWrite> sched> {
def NAME : VOPC_Class_NoSdst_Profile<sched, f16>;
- def _t16 : VOPC_Class_NoSdst_Profile<sched, f16, i16> {
+ def _t16 : VOPC_Class_NoSdst_Profile<sched, f16, f16> {
let IsTrue16 = 1;
let IsRealTrue16 = 1;
- let Src1RC32 = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
- let Src1RC64 = VSrc_b32;
- let Src0DPP = getVregSrcForVT<Src0VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
- let Src1DPP = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
- let Src2DPP = getVregSrcForVT<Src2VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
+ let HasOpSel = 1;
+ let HasModifiers = 1; // All instructions at least have OpSel
+ let Src1RC32 = getVregSrcForVT<Src1VT, 1/*IsTrue16*/>.ret;
+ let Src0DPP = getVregSrcForVT<Src0VT, 1/*IsTrue16*/>.ret;
+ let Src1DPP = getVregSrcForVT<Src1VT, 1/*IsTrue16*/>.ret;
+ let Src2DPP = getVregSrcForVT<Src2VT, 1/*IsTrue16*/>.ret;
let Src0ModDPP = getSrcModDPP_t16<Src0VT>.ret;
let Src1ModDPP = getSrcModDPP_t16<Src1VT>.ret;
let Src2ModDPP = getSrcModDPP_t16<Src2VT>.ret;
+
+ let DstRC64 = getVALUDstForVT<DstVT, 1, 1>.ret;
+ let Src0RC64 = getVOP3SrcForVT<Src0VT, 1/*IsTrue16*/>.ret;
+ let Src1RC64 = getVOP3SrcForVT<Src1VT, 1/*IsTrue16*/>.ret;
+ let Src2RC64 = getVOP3SrcForVT<Src2VT, 1/*IsTrue16*/>.ret;
+ let Src0Mod = getSrcMod<Src0VT, 1/*IsTrue16*/>.ret;
+ let Src1Mod = getSrcMod<Src1VT, 1/*IsTrue16*/>.ret;
+ let Src2Mod = getSrcMod<Src2VT, 1/*IsTrue16*/>.ret;
}
- def _fake16 : VOPC_Class_NoSdst_Profile<sched, f16, i16> {
+ def _fake16 : VOPC_Class_NoSdst_Profile<sched, f16, f16> {
let IsTrue16 = 1;
+ let DstRC = getVALUDstForVT_fake16<DstVT>.ret;
+ let DstRC64 = getVALUDstForVT<DstVT>.ret;
+ let Src0RC32 = getVOPSrc0ForVT<Src0VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
let Src1RC32 = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
let Src1RC64 = VSrc_b32;
let Src0DPP = getVregSrcForVT<Src0VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
let Src1DPP = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
let Src2DPP = getVregSrcForVT<Src2VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
- let Src0ModDPP = getSrcModDPP_t16<Src0VT>.ret;
- let Src1ModDPP = getSrcModDPP_t16<Src1VT>.ret;
- let Src2ModDPP = getSrcModDPP_t16<Src2VT>.ret;
+
+ let Src0ModDPP = getSrcModDPP_t16<Src0VT, 1/*IsFake16*/>.ret;
+ let Src1ModDPP = getSrcModDPP_t16<Src1VT, 1/*IsFake16*/>.ret;
+ let Src2ModDPP = getSrcModDPP_t16<Src2VT, 1/*IsFake16*/>.ret;
+ let Src0VOP3DPP = VGPRSrc_32;
+ let Src1VOP3DPP = getVOP3DPPSrcForVT<Src1VT, 1/*IsFake16*/>.ret;
+ let Src2VOP3DPP = getVOP3DPPSrcForVT<Src2VT, 1/*IsFake16*/>.ret;
+ let Src0ModVOP3DPP = getSrc0ModVOP3DPP<Src0VT, DstVT, 1/*IsFake16*/>.ret;
+ let Src1ModVOP3DPP = getSrcModVOP3DPP<Src1VT, 1/*IsFake16*/>.ret;
+ let Src2ModVOP3DPP = getSrcModVOP3DPP<Src2VT, 1/*IsFake16*/>.ret;
}
}
-class getVOPCClassPat64 <VOPProfile P> {
- list<dag> ret =
- [(set i1:$sdst,
+multiclass VOPCClassPat64<string inst_name> {
+ defvar inst = !cast<VOP_Pseudo>(inst_name#"_e64");
+ defvar P = inst.Pfl;
+ def : GCNPat <
+ (i1:$sdst
(AMDGPUfp_class
(P.Src0VT (VOP3ModsNonCanonicalizing P.Src0VT:$src0, i32:$src0_modifiers)),
- i32:$src1))];
+ P.Src1VT:$src1)),
+ (inst i32:$src0_modifiers, P.Src0VT:$src0,
+ 0 /*src1_modifiers*/, P.Src1VT:$src1)
+ >;
+}
+multiclass VOPCClassPat64_fake16<string inst_name> {
+ defvar inst = !cast<VOP_Pseudo>(inst_name#"_fake16_e64");
+ defvar P = inst.Pfl;
+ def : GCNPat <
+ (i1:$sdst
+ (AMDGPUfp_class
+ (P.Src0VT (VOP3ModsNonCanonicalizing P.Src0VT:$src0, i32:$src0_modifiers)),
+ i32:$src1)),
+ (inst i32:$src0_modifiers, P.Src0VT:$src0,
+ 0 /*src1_modifiers*/, VGPR_32:$src1)
+ >;
}
-
-// Special case for class instructions which only have modifiers on
-// the 1st source operand.
+// cmp_class ignores the FP mode and faithfully reports the unmodified
+// source value.
+let ReadsModeReg = 0, mayRaiseFPException = 0 in {
multiclass VOPC_Class_Pseudos <string opName, VOPC_Profile p, bit DefExec,
bit DefVcc = 1> {
def _e32 : VOPC_Pseudo <opName, p>,
@@ -910,10 +1002,14 @@ multiclass VOPC_Class_Pseudos <string opName, VOPC_Profile p, bit DefExec,
let isConvergent = DefExec;
}
- def _e64 : VOP3_Pseudo<opName, p, getVOPCClassPat64<p>.ret>,
+ def _e64 : VOP3_Pseudo<opName, p, [], 0/*IsVOP3P*/, p.HasOpSel>,
VCMPXNoSDstTable<1, opName#"_e64"> {
let Defs = !if(DefExec, [EXEC], []);
let SchedRW = p.Schedule;
+ let AsmMatchConverter =
+ !if (p.HasOpSel, "cvtVOP3OpSel",
+ !if (!or(p.HasModifiers, p.HasOMod, p.HasIntClamp), "cvtVOP3",
+ ""));
}
if p.HasExtSDWA then
@@ -957,11 +1053,16 @@ multiclass VOPCX_Class_Pseudos <string opName,
let SubtargetPredicate = HasNoSdstCMPX;
}
- def _nosdst_e64 : VOP3_Pseudo<opName#"_nosdst", P_NoSDst>,
+ def _nosdst_e64 : VOP3_Pseudo<opName#"_nosdst", P_NoSDst, [], 0/*IsVOP3P*/, P_NoSDst.HasOpSel>,
VCMPXNoSDstTable<0, opName#"_e64"> {
let Defs = [EXEC];
let SchedRW = P_NoSDst.Schedule;
let SubtargetPredicate = HasNoSdstCMPX;
+ let AsmMatchConverter =
+ !if (P_NoSDst.HasOpSel, "cvtVOP3OpSel",
+ !if (!or(P_NoSDst.HasModifiers, P_NoSDst.HasOMod,
+ P_NoSDst.HasIntClamp),
+ "cvtVOP3", ""));
}
if P_NoSDst.HasExtSDWA then
@@ -990,6 +1091,7 @@ multiclass VOPCX_Class_Pseudos <string opName,
} // end SubtargetPredicate = isGFX11Plus
}
} // End SubtargetPredicate = HasSdstCMPX
+} // End ReadsModeReg = 0, mayRaiseFPException = 0
defm VOPC_I1_F16_I16 : VOPC_Class_Profile_t16<[Write32Bit]>;
def VOPC_I1_F32_I32 : VOPC_Class_Profile<[Write32Bit], f32>;
@@ -1002,12 +1104,14 @@ def VOPC_F64_I32 : VOPC_Class_NoSdst_Profile<[Write64Bit], f64>;
multiclass VOPC_CLASS_F16 <string opName> {
let OtherPredicates = [Has16BitInsts], True16Predicate = NotHasTrue16BitInsts in {
defm NAME : VOPC_Class_Pseudos <opName, VOPC_I1_F16_I16, 0>;
+ defm : VOPCClassPat64<NAME>;
}
- let OtherPredicates = [UseRealTrue16Insts] in {
+ let True16Predicate = UseRealTrue16Insts in {
defm _t16 : VOPC_Class_Pseudos <opName#"_t16", VOPC_I1_F16_I16_t16, 0>;
}
- let OtherPredicates = [UseFakeTrue16Insts] in {
+ let True16Predicate = UseFakeTrue16Insts in {
defm _fake16 : VOPC_Class_Pseudos <opName#"_fake16", VOPC_I1_F16_I16_fake16, 0>;
+ defm : VOPCClassPat64_fake16<NAME>;
}
}
@@ -1015,37 +1119,31 @@ multiclass VOPCX_CLASS_F16 <string opName> {
let OtherPredicates = [Has16BitInsts], True16Predicate = NotHasTrue16BitInsts in {
defm NAME : VOPCX_Class_Pseudos <opName, VOPC_I1_F16_I16, VOPC_F16_I16>;
}
- let OtherPredicates = [UseRealTrue16Insts] in {
+ let True16Predicate = UseRealTrue16Insts in {
defm _t16 : VOPCX_Class_Pseudos <opName#"_t16", VOPC_I1_F16_I16_t16, VOPC_F16_I16_t16>;
}
- let OtherPredicates = [UseFakeTrue16Insts] in {
+ let True16Predicate = UseFakeTrue16Insts in {
defm _fake16 : VOPCX_Class_Pseudos <opName#"_fake16", VOPC_I1_F16_I16_fake16, VOPC_F16_I16_fake16>;
}
}
-multiclass VOPC_CLASS_F32 <string opName> :
- VOPC_Class_Pseudos <opName, VOPC_I1_F32_I32, 0>;
-
-multiclass VOPCX_CLASS_F32 <string opName> :
- VOPCX_Class_Pseudos <opName, VOPC_I1_F32_I32, VOPC_F32_I32>;
-
-multiclass VOPC_CLASS_F64 <string opName> :
- VOPC_Class_Pseudos <opName, VOPC_I1_F64_I32, 0>;
+multiclass VOPC_CLASS_F32 <string opName> {
+ defm NAME : VOPC_Class_Pseudos <opName, VOPC_I1_F32_I32, 0>;
+ defm : VOPCClassPat64<NAME>;
+}
-multiclass VOPCX_CLASS_F64 <string opName> :
- VOPCX_Class_Pseudos <opName, VOPC_I1_F64_I32, VOPC_F64_I32>;
+multiclass VOPC_CLASS_F64 <string opName> {
+ defm NAME : VOPC_Class_Pseudos <opName, VOPC_I1_F64_I32, 0>;
+ defm : VOPCClassPat64<NAME>;
+}
-// cmp_class ignores the FP mode and faithfully reports the unmodified
-// source value.
-let ReadsModeReg = 0, mayRaiseFPException = 0 in {
defm V_CMP_CLASS_F32 : VOPC_CLASS_F32 <"v_cmp_class_f32">;
-defm V_CMPX_CLASS_F32 : VOPCX_CLASS_F32 <"v_cmpx_class_f32">;
+defm V_CMPX_CLASS_F32 : VOPCX_Class_Pseudos <"v_cmpx_class_f32", VOPC_I1_F32_I32, VOPC_F32_I32>;
defm V_CMP_CLASS_F64 : VOPC_CLASS_F64 <"v_cmp_class_f64">;
-defm V_CMPX_CLASS_F64 : VOPCX_CLASS_F64 <"v_cmpx_class_f64">;
+defm V_CMPX_CLASS_F64 : VOPCX_Class_Pseudos <"v_cmpx_class_f64", VOPC_I1_F64_I32, VOPC_F64_I32>;
defm V_CMP_CLASS_F16 : VOPC_CLASS_F16 <"v_cmp_class_f16">;
defm V_CMPX_CLASS_F16 : VOPCX_CLASS_F16 <"v_cmpx_class_f16">;
-} // End ReadsModeReg = 0, mayRaiseFPException = 0
//===----------------------------------------------------------------------===//
// V_ICMPIntrinsic Pattern.
@@ -1096,20 +1194,20 @@ defm : ICMP_Pattern <COND_SGE, V_CMP_GE_I64_e64, i64>;
defm : ICMP_Pattern <COND_SLT, V_CMP_LT_I64_e64, i64>;
defm : ICMP_Pattern <COND_SLE, V_CMP_LE_I64_e64, i64>;
-let OtherPredicates = [HasTrue16BitInsts] in {
-defm : ICMP_Pattern <COND_EQ, V_CMP_EQ_U16_t16_e64, i16>;
-defm : ICMP_Pattern <COND_NE, V_CMP_NE_U16_t16_e64, i16>;
-defm : ICMP_Pattern <COND_UGT, V_CMP_GT_U16_t16_e64, i16>;
-defm : ICMP_Pattern <COND_UGE, V_CMP_GE_U16_t16_e64, i16>;
-defm : ICMP_Pattern <COND_ULT, V_CMP_LT_U16_t16_e64, i16>;
-defm : ICMP_Pattern <COND_ULE, V_CMP_LE_U16_t16_e64, i16>;
-defm : ICMP_Pattern <COND_SGT, V_CMP_GT_I16_t16_e64, i16>;
-defm : ICMP_Pattern <COND_SGE, V_CMP_GE_I16_t16_e64, i16>;
-defm : ICMP_Pattern <COND_SLT, V_CMP_LT_I16_t16_e64, i16>;
-defm : ICMP_Pattern <COND_SLE, V_CMP_LE_I16_t16_e64, i16>;
-} // End OtherPredicates = [HasTrue16BitInsts]
-
-let OtherPredicates = [NotHasTrue16BitInsts] in {
+let True16Predicate = UseFakeTrue16Insts in {
+defm : ICMP_Pattern <COND_EQ, V_CMP_EQ_U16_fake16_e64, i16>;
+defm : ICMP_Pattern <COND_NE, V_CMP_NE_U16_fake16_e64, i16>;
+defm : ICMP_Pattern <COND_UGT, V_CMP_GT_U16_fake16_e64, i16>;
+defm : ICMP_Pattern <COND_UGE, V_CMP_GE_U16_fake16_e64, i16>;
+defm : ICMP_Pattern <COND_ULT, V_CMP_LT_U16_fake16_e64, i16>;
+defm : ICMP_Pattern <COND_ULE, V_CMP_LE_U16_fake16_e64, i16>;
+defm : ICMP_Pattern <COND_SGT, V_CMP_GT_I16_fake16_e64, i16>;
+defm : ICMP_Pattern <COND_SGE, V_CMP_GE_I16_fake16_e64, i16>;
+defm : ICMP_Pattern <COND_SLT, V_CMP_LT_I16_fake16_e64, i16>;
+defm : ICMP_Pattern <COND_SLE, V_CMP_LE_I16_fake16_e64, i16>;
+} // End OtherPredicates = UseFakeTrue16Insts
+
+let True16Predicate = NotHasTrue16BitInsts in {
defm : ICMP_Pattern <COND_EQ, V_CMP_EQ_U16_e64, i16>;
defm : ICMP_Pattern <COND_NE, V_CMP_NE_U16_e64, i16>;
defm : ICMP_Pattern <COND_UGT, V_CMP_GT_U16_e64, i16>;
@@ -1120,7 +1218,7 @@ defm : ICMP_Pattern <COND_SGT, V_CMP_GT_I16_e64, i16>;
defm : ICMP_Pattern <COND_SGE, V_CMP_GE_I16_e64, i16>;
defm : ICMP_Pattern <COND_SLT, V_CMP_LT_I16_e64, i16>;
defm : ICMP_Pattern <COND_SLE, V_CMP_LE_I16_e64, i16>;
-} // End OtherPredicates = [NotHasTrue16BitInsts]
+} // End True16Predicate = NotHasTrue16BitInsts
multiclass FCMP_Pattern <PatFrags cond, Instruction inst, ValueType vt> {
let WaveSizePredicate = isWave64 in
@@ -1181,25 +1279,25 @@ defm : FCMP_Pattern <COND_UGE, V_CMP_NLT_F64_e64, f64>;
defm : FCMP_Pattern <COND_ULT, V_CMP_NGE_F64_e64, f64>;
defm : FCMP_Pattern <COND_ULE, V_CMP_NGT_F64_e64, f64>;
-let OtherPredicates = [HasTrue16BitInsts] in {
-defm : FCMP_Pattern <COND_O, V_CMP_O_F16_t16_e64, f16>;
-defm : FCMP_Pattern <COND_UO, V_CMP_U_F16_t16_e64, f16>;
-defm : FCMP_Pattern <COND_OEQ, V_CMP_EQ_F16_t16_e64, f16>;
-defm : FCMP_Pattern <COND_ONE, V_CMP_NEQ_F16_t16_e64, f16>;
-defm : FCMP_Pattern <COND_OGT, V_CMP_GT_F16_t16_e64, f16>;
-defm : FCMP_Pattern <COND_OGE, V_CMP_GE_F16_t16_e64, f16>;
-defm : FCMP_Pattern <COND_OLT, V_CMP_LT_F16_t16_e64, f16>;
-defm : FCMP_Pattern <COND_OLE, V_CMP_LE_F16_t16_e64, f16>;
-
-defm : FCMP_Pattern <COND_UEQ, V_CMP_NLG_F16_t16_e64, f16>;
-defm : FCMP_Pattern <COND_UNE, V_CMP_NEQ_F16_t16_e64, f16>;
-defm : FCMP_Pattern <COND_UGT, V_CMP_NLE_F16_t16_e64, f16>;
-defm : FCMP_Pattern <COND_UGE, V_CMP_NLT_F16_t16_e64, f16>;
-defm : FCMP_Pattern <COND_ULT, V_CMP_NGE_F16_t16_e64, f16>;
-defm : FCMP_Pattern <COND_ULE, V_CMP_NGT_F16_t16_e64, f16>;
-} // End OtherPredicates = [HasTrue16BitInsts]
-
-let OtherPredicates = [NotHasTrue16BitInsts] in {
+let True16Predicate = UseFakeTrue16Insts in {
+defm : FCMP_Pattern <COND_O, V_CMP_O_F16_fake16_e64, f16>;
+defm : FCMP_Pattern <COND_UO, V_CMP_U_F16_fake16_e64, f16>;
+defm : FCMP_Pattern <COND_OEQ, V_CMP_EQ_F16_fake16_e64, f16>;
+defm : FCMP_Pattern <COND_ONE, V_CMP_NEQ_F16_fake16_e64, f16>;
+defm : FCMP_Pattern <COND_OGT, V_CMP_GT_F16_fake16_e64, f16>;
+defm : FCMP_Pattern <COND_OGE, V_CMP_GE_F16_fake16_e64, f16>;
+defm : FCMP_Pattern <COND_OLT, V_CMP_LT_F16_fake16_e64, f16>;
+defm : FCMP_Pattern <COND_OLE, V_CMP_LE_F16_fake16_e64, f16>;
+
+defm : FCMP_Pattern <COND_UEQ, V_CMP_NLG_F16_fake16_e64, f16>;
+defm : FCMP_Pattern <COND_UNE, V_CMP_NEQ_F16_fake16_e64, f16>;
+defm : FCMP_Pattern <COND_UGT, V_CMP_NLE_F16_fake16_e64, f16>;
+defm : FCMP_Pattern <COND_UGE, V_CMP_NLT_F16_fake16_e64, f16>;
+defm : FCMP_Pattern <COND_ULT, V_CMP_NGE_F16_fake16_e64, f16>;
+defm : FCMP_Pattern <COND_ULE, V_CMP_NGT_F16_fake16_e64, f16>;
+} // End True16Predicate = UseFakeTrue16Insts
+
+let True16Predicate = NotHasTrue16BitInsts in {
defm : FCMP_Pattern <COND_O, V_CMP_O_F16_e64, f16>;
defm : FCMP_Pattern <COND_UO, V_CMP_U_F16_e64, f16>;
defm : FCMP_Pattern <COND_OEQ, V_CMP_EQ_F16_e64, f16>;
@@ -1215,7 +1313,7 @@ defm : FCMP_Pattern <COND_UGT, V_CMP_NLE_F16_e64, f16>;
defm : FCMP_Pattern <COND_UGE, V_CMP_NLT_F16_e64, f16>;
defm : FCMP_Pattern <COND_ULT, V_CMP_NGE_F16_e64, f16>;
defm : FCMP_Pattern <COND_ULE, V_CMP_NGT_F16_e64, f16>;
-} // End OtherPredicates = [NotHasTrue16BitInsts]
+} // End True16Predicate = NotHasTrue16BitInsts
//===----------------------------------------------------------------------===//
// DPP Encodings
@@ -1283,11 +1381,12 @@ class VOPC_DPP16<bits<8> op, VOP_DPP_Pseudo ps, string opName = ps.OpName>
: VOPC_DPP_Base<op, opName, ps.Pfl> {
let AssemblerPredicate = HasDPP16;
let SubtargetPredicate = HasDPP16;
+ let True16Predicate = ps.True16Predicate;
+ let OtherPredicates = ps.OtherPredicates;
let hasSideEffects = ps.hasSideEffects;
let Defs = ps.Defs;
let SchedRW = ps.SchedRW;
let Uses = ps.Uses;
- let OtherPredicates = ps.OtherPredicates;
let Constraints = ps.Constraints;
}
@@ -1303,93 +1402,96 @@ class VOPC_DPP8<bits<8> op, VOPC_Pseudo ps, string opName = ps.OpName>
let SchedRW = ps.SchedRW;
let Uses = ps.Uses;
let OtherPredicates = ps.OtherPredicates;
+ let True16Predicate = ps.True16Predicate;
let Constraints = "";
}
// VOPC64
-class VOPC64_DPP_Base<bits<10> op, string OpName, VOPProfile P>
- : VOP3_DPP_Base<OpName, P, 1>, VOP3_DPPe_Common<op, P> {
+class VOPC64_DPP<VOP_DPP_Pseudo ps, string opName = ps.OpName>
+ : VOP3_DPP_Base<opName, ps.Pfl, 1> {
Instruction Opcode = !cast<Instruction>(NAME);
-
- bits<8> src0;
- bits<9> dpp_ctrl;
- bits<1> bound_ctrl;
- bits<4> bank_mask;
- bits<4> row_mask;
- bit fi;
-
- let Inst{40-32} = 0xfa;
- let Inst{71-64} = !if(P.HasSrc0, src0{7-0}, 0);
- let Inst{80-72} = dpp_ctrl;
- let Inst{82} = fi;
- let Inst{83} = bound_ctrl;
- // Inst{87-84} ignored by hw
- let Inst{91-88} = bank_mask;
- let Inst{95-92} = row_mask;
-}
-
-class VOPC64_DPP16<bits<10> op, VOP_DPP_Pseudo ps, string opName = ps.OpName>
- : VOPC64_DPP_Base<op, opName, ps.Pfl> {
let AssemblerPredicate = HasDPP16;
let SubtargetPredicate = HasDPP16;
+ let True16Predicate = ps.True16Predicate;
+ let OtherPredicates = ps.OtherPredicates;
let hasSideEffects = ps.hasSideEffects;
let Defs = ps.Defs;
let SchedRW = ps.SchedRW;
let Uses = ps.Uses;
- let OtherPredicates = ps.OtherPredicates;
let Constraints = ps.Constraints;
}
class VOPC64_DPP16_Dst<bits<10> op, VOP_DPP_Pseudo ps,
string opName = ps.OpName>
- : VOPC64_DPP16<op, ps, opName> {
+ : VOPC64_DPP<ps, opName>, VOP3_DPP_Enc<op, ps.Pfl, 1> {
+
bits<8> sdst;
let Inst{7-0} = sdst;
}
class VOPC64_DPP16_NoDst<bits<10> op, VOP_DPP_Pseudo ps,
string opName = ps.OpName>
- : VOPC64_DPP16<op, ps, opName> {
+ : VOPC64_DPP<ps, opName>, VOP3_DPP_Enc<op, ps.Pfl, 1> {
+
let Inst{7-0} = ? ;
}
-class VOPC64_DPP8_Base<bits<10> op, string OpName, VOPProfile P>
- : VOP3_DPP8_Base<OpName, P>, VOP3_DPPe_Common<op, P> {
- Instruction Opcode = !cast<Instruction>(NAME);
-
- bits<8> src0;
- bits<24> dpp8;
- bits<9> fi;
+class VOPC64_DPP16_Dst_t16<bits<10> op, VOP_DPP_Pseudo ps,
+ string opName = ps.OpName>
+ : VOPC64_DPP<ps, opName>, VOP3_DPP_Enc_t16<op, ps.Pfl, 1> {
+ bits<8> sdst;
+ let Inst{7-0} = sdst;
+ let Inst{14} = 0;
+}
- let Inst{40-32} = fi;
- let Inst{71-64} = !if(P.HasSrc0, src0{7-0}, 0);
- let Inst{95-72} = dpp8{23-0};
+class VOPC64_DPP16_NoDst_t16<bits<10> op, VOP_DPP_Pseudo ps,
+ string opName = ps.OpName>
+ : VOPC64_DPP<ps, opName>, VOP3_DPP_Enc_t16<op, ps.Pfl, 1> {
+ let Inst{7-0} = ? ;
+ let Inst{14} = 0;
}
-class VOPC64_DPP8<bits<10> op, VOP_Pseudo ps, string opName = ps.OpName>
- : VOPC64_DPP8_Base<op, opName, ps.Pfl> {
+class VOPC64_DPP8<VOP_Pseudo ps, string opName = ps.OpName>
+ : VOP3_DPP8_Base<opName, ps.Pfl> {
+ Instruction Opcode = !cast<Instruction>(NAME);
// Note ps is the non-dpp pseudo
let hasSideEffects = ps.hasSideEffects;
let Defs = ps.Defs;
let SchedRW = ps.SchedRW;
let Uses = ps.Uses;
let OtherPredicates = ps.OtherPredicates;
+ let True16Predicate = ps.True16Predicate;
}
class VOPC64_DPP8_Dst<bits<10> op, VOP_Pseudo ps, string opName = ps.OpName>
- : VOPC64_DPP8<op, ps, opName> {
+ : VOPC64_DPP8<ps, opName>, VOP3_DPP8_Enc<op, ps.Pfl> {
bits<8> sdst;
let Inst{7-0} = sdst;
let Constraints = "";
}
class VOPC64_DPP8_NoDst<bits<10> op, VOP_Pseudo ps, string opName = ps.OpName>
- : VOPC64_DPP8<op, ps, opName> {
+ : VOPC64_DPP8<ps, opName>, VOP3_DPP8_Enc<op, ps.Pfl> {
let Inst{7-0} = ? ;
let Constraints = "";
}
+class VOPC64_DPP8_Dst_t16<bits<10> op, VOP_Pseudo ps, string opName = ps.OpName>
+ : VOPC64_DPP8<ps, opName>, VOP3_DPP8_Enc_t16<op, ps.Pfl> {
+ bits<8> sdst;
+ let Inst{7-0} = sdst;
+ let Inst{14} = 0;
+ let Constraints = "";
+}
+
+class VOPC64_DPP8_NoDst_t16<bits<10> op, VOP_Pseudo ps, string opName = ps.OpName>
+ : VOPC64_DPP8<ps, opName>, VOP3_DPP8_Enc_t16<op, ps.Pfl> {
+ let Inst{7-0} = ? ;
+ let Inst{14} = 0;
+ let Constraints = "";
+}
+
//===----------------------------------------------------------------------===//
// Target-specific instruction encodings.
//===----------------------------------------------------------------------===//
@@ -1404,8 +1506,8 @@ multiclass VOPC_Real_Base<GFXGen Gen, bits<9> op> {
defvar ps64 = !cast<VOP3_Pseudo>(NAME#"_e64");
def _e32#Gen.Suffix : VOPC_Real<ps32, Gen.Subtarget>,
VOPCe<op{7-0}>;
- def _e64#Gen.Suffix : VOP3_Real<ps64, Gen.Subtarget>,
- VOP3a_gfx11_gfx12<{0, op}, ps64.Pfl> {
+ def _e64#Gen.Suffix : VOP3_Real_Gen<ps64, Gen>,
+ VOP3a_gfx11_gfx12<{0, op}, ps64.Pfl> {
// Encoding used for VOPC instructions encoded as VOP3 differs from
// VOP3e by destination name (sdst) as VOPC doesn't have vector dst.
bits<8> sdst;
@@ -1470,13 +1572,25 @@ multiclass VOPC_Real_with_name<GFXGen Gen, bits<9> op, string OpName,
// the destination-less 32bit forms add it to the asmString here.
VOPC_Real<ps32, Gen.Subtarget, asm_name#"_e32">,
VOPCe<op{7-0}>;
- def _e64#Gen.Suffix :
- VOP3_Real_Gen<ps64, Gen, asm_name>,
- VOP3a_gfx11_gfx12<{0, op}, ps64.Pfl> {
- // Encoding used for VOPC instructions encoded as VOP3 differs from
- // VOP3e by destination name (sdst) as VOPC doesn't have vector dst.
- bits<8> sdst;
- let Inst{7-0} = sdst;
+ if ps64.Pfl.IsRealTrue16 then {
+ def _e64#Gen.Suffix :
+ VOP3_Real_Gen<ps64, Gen, asm_name>,
+ VOP3e_t16_gfx11_gfx12<{0, op}, ps64.Pfl> {
+ // Encoding used for VOPC instructions encoded as VOP3 differs from
+ // VOP3e by destination name (sdst) as VOPC doesn't have vector dst.
+ bits<8> sdst;
+ let Inst{7-0} = sdst;
+ let Inst{14} = 0;
+ }
+ } else {
+ def _e64#Gen.Suffix :
+ VOP3_Real_Gen<ps64, Gen, asm_name>,
+ VOP3a_gfx11_gfx12<{0, op}, ps64.Pfl>{
+ // Encoding used for VOPC instructions encoded as VOP3 differs from
+ // VOP3e by destination name (sdst) as VOPC doesn't have vector dst.
+ bits<8> sdst;
+ let Inst{7-0} = sdst;
+ }
}
defm : VOPCInstAliases<OpName, !substr(Gen.Suffix, 1), NAME, asm_name>;
@@ -1516,9 +1630,16 @@ multiclass VOPC_Real_with_name<GFXGen Gen, bits<9> op, string OpName,
if ps64.Pfl.HasExtVOP3DPP then {
defvar psDPP = !cast<VOP_DPP_Pseudo>(OpName #"_e64" #"_dpp");
- def _e64_dpp#Gen.Suffix : VOPC64_DPP16_Dst<{0, op}, psDPP, asm_name>,
- SIMCInstr<psDPP.PseudoInstr, Gen.Subtarget>;
- def _e64_dpp8#Gen.Suffix : VOPC64_DPP8_Dst<{0, op}, ps64, asm_name>;
+ if ps64.Pfl.IsRealTrue16 then {
+ def _e64_dpp#Gen.Suffix : VOPC64_DPP16_Dst_t16<{0, op}, psDPP, asm_name>,
+ SIMCInstr<psDPP.PseudoInstr, Gen.Subtarget>;
+ def _e64_dpp8#Gen.Suffix : VOPC64_DPP8_Dst_t16<{0, op}, ps64, asm_name>;
+ } else {
+ def _e64_dpp#Gen.Suffix : VOPC64_DPP16_Dst<{0, op}, psDPP, asm_name>,
+ SIMCInstr<psDPP.PseudoInstr, Gen.Subtarget>;
+ def _e64_dpp8#Gen.Suffix : VOPC64_DPP8_Dst<{0, op}, ps64, asm_name>;
+ } // end if ps64.Pfl.IsRealTrue16
+
} // end if ps64.Pfl.HasExtVOP3DPP
} // End DecoderNamespace
} // End AssemblerPredicate
@@ -1539,7 +1660,7 @@ multiclass VOPCX_Real<GFXGen Gen, bits<9> op> {
# " " # ps32.AsmOperands;
}
def _e64#Gen.Suffix :
- VOP3_Real<ps64, Gen.Subtarget>,
+ VOP3_Real_Gen<ps64, Gen>,
VOP3a_gfx11_gfx12<{0, op}, ps64.Pfl> {
let Inst{7-0} = ?; // sdst
let AsmString = !subst("_nosdst", "", ps64.Mnemonic)
@@ -1597,11 +1718,21 @@ multiclass VOPCX_Real_with_name<GFXGen Gen, bits<9> op, string OpName,
VOPCe<op{7-0}> {
let AsmString = asm_name # "{_e32} " # ps32.AsmOperands;
}
- def _e64#Gen.Suffix
- : VOP3_Real_Gen<ps64, Gen, asm_name>,
- VOP3a_gfx11_gfx12<{0, op}, ps64.Pfl> {
- let Inst{7-0} = ? ; // sdst
- let AsmString = asm_name # "{_e64} " # ps64.AsmOperands;
+ if ps64.Pfl.IsRealTrue16 then {
+ def _e64#Gen.Suffix
+ : VOP3_Real_Gen<ps64, Gen, asm_name>,
+ VOP3e_t16_gfx11_gfx12<{0, op}, ps64.Pfl> {
+ let Inst{7-0} = ? ; // sdst
+ let Inst{14} = 0;
+ let AsmString = asm_name # "{_e64} " # ps64.AsmOperands;
+ }
+ } else {
+ def _e64#Gen.Suffix
+ : VOP3_Real_Gen<ps64, Gen, asm_name>,
+ VOP3a_gfx11_gfx12<{0, op}, ps64.Pfl> {
+ let Inst{7-0} = ? ; // sdst
+ let AsmString = asm_name # "{_e64} " # ps64.AsmOperands;
+ }
}
defm : VOPCXInstAliases<OpName, !substr(Gen.Suffix, 1), NAME, asm_name>;
@@ -1615,14 +1746,25 @@ multiclass VOPCX_Real_with_name<GFXGen Gen, bits<9> op, string OpName,
if ps64.Pfl.HasExtVOP3DPP then {
defvar psDPP = !cast<VOP_DPP_Pseudo>(OpName#"_nosdst_e64"#"_dpp");
defvar AsmDPP = ps64.Pfl.AsmVOP3DPP16;
- def _e64_dpp#Gen.Suffix
- : VOPC64_DPP16_NoDst<{0, op}, psDPP, asm_name>,
- SIMCInstr<psDPP.PseudoInstr, Gen.Subtarget> {
- let AsmString = asm_name # "{_e64_dpp} " # AsmDPP;
- }
defvar AsmDPP8 = ps64.Pfl.AsmVOP3DPP8;
- def _e64_dpp8#Gen.Suffix : VOPC64_DPP8_NoDst<{0, op}, ps64, asm_name> {
- let AsmString = asm_name # "{_e64_dpp} " # AsmDPP8;
+ if ps64.Pfl.IsRealTrue16 then {
+ def _e64_dpp#Gen.Suffix
+ : VOPC64_DPP16_NoDst_t16<{0, op}, psDPP, asm_name>,
+ SIMCInstr<psDPP.PseudoInstr, Gen.Subtarget> {
+ let AsmString = asm_name # "{_e64_dpp} " # AsmDPP;
+ }
+ def _e64_dpp8#Gen.Suffix : VOPC64_DPP8_NoDst_t16<{0, op}, ps64, asm_name> {
+ let AsmString = asm_name # "{_e64_dpp} " # AsmDPP8;
+ }
+ } else {
+ def _e64_dpp#Gen.Suffix
+ : VOPC64_DPP16_NoDst<{0, op}, psDPP, asm_name>,
+ SIMCInstr<psDPP.PseudoInstr, Gen.Subtarget> {
+ let AsmString = asm_name # "{_e64_dpp} " # AsmDPP;
+ }
+ def _e64_dpp8#Gen.Suffix : VOPC64_DPP8_NoDst<{0, op}, ps64, asm_name> {
+ let AsmString = asm_name # "{_e64_dpp} " # AsmDPP8;
+ }
}
} // End if ps64.Pfl.HasExtVOP3DPP
} // End DecoderNamespace
@@ -1655,22 +1797,46 @@ multiclass VOPC_Real_t16_gfx11<bits <9> op, string asm_name,
string OpName = NAME, string pseudo_mnemonic = ""> :
VOPC_Real_t16<GFX11Gen, op, asm_name, OpName, pseudo_mnemonic>;
+multiclass VOPC_Real_t16_and_fake16_gfx11<bits <9> op, string asm_name,
+ string OpName = NAME, string pseudo_mnemonic = ""> {
+ defm OpName#"_t16":VOPC_Real_t16_gfx11<op, asm_name, OpName#"_t16", pseudo_mnemonic>;
+ defm OpName#"_fake16":VOPC_Real_t16_gfx11<op, asm_name, OpName#"_fake16", pseudo_mnemonic>;
+}
+
multiclass VOPC_Real_t16_gfx11_gfx12<bits <9> op, string asm_name,
string OpName = NAME, string pseudo_mnemonic = ""> :
VOPC_Real_t16<GFX11Gen, op, asm_name, OpName, pseudo_mnemonic>,
VOPC_Real_t16<GFX12Gen, op, asm_name, OpName, pseudo_mnemonic>;
+multiclass VOPC_Real_t16_and_fake16_gfx11_gfx12<bits <9> op, string asm_name,
+ string OpName = NAME, string pseudo_mnemonic = ""> {
+ defm OpName#"_t16":VOPC_Real_t16_gfx11_gfx12<op, asm_name, OpName#"_t16", pseudo_mnemonic>;
+ defm OpName#"_fake16":VOPC_Real_t16_gfx11_gfx12<op, asm_name, OpName#"_fake16", pseudo_mnemonic>;
+}
+
multiclass VOPCX_Real_t16_gfx11<bits<9> op, string asm_name,
string OpName = NAME, string pseudo_mnemonic = ""> :
VOPCX_Real_t16<GFX11Gen, op, asm_name, OpName, pseudo_mnemonic>;
+multiclass VOPCX_Real_t16_and_fake16_gfx11<bits <9> op, string asm_name,
+ string OpName = NAME, string pseudo_mnemonic = ""> {
+ defm OpName#"_t16":VOPCX_Real_t16_gfx11<op, asm_name, OpName#"_t16", pseudo_mnemonic>;
+ defm OpName#"_fake16":VOPCX_Real_t16_gfx11<op, asm_name, OpName#"_fake16", pseudo_mnemonic>;
+}
+
multiclass VOPCX_Real_t16_gfx11_gfx12<bits<9> op, string asm_name,
string OpName = NAME, string pseudo_mnemonic = ""> :
VOPCX_Real_t16<GFX11Gen, op, asm_name, OpName, pseudo_mnemonic>,
VOPCX_Real_t16<GFX12Gen, op, asm_name, OpName, pseudo_mnemonic>;
-defm V_CMP_F_F16_t16 : VOPC_Real_t16_gfx11<0x000, "v_cmp_f_f16">;
-defm V_CMP_LT_F16_t16 : VOPC_Real_t16_gfx11_gfx12<0x001, "v_cmp_lt_f16">;
+multiclass VOPCX_Real_t16_and_fake16_gfx11_gfx12<bits <9> op, string asm_name,
+ string OpName = NAME, string pseudo_mnemonic = ""> {
+ defm OpName#"_t16":VOPCX_Real_t16_gfx11_gfx12<op, asm_name, OpName#"_t16", pseudo_mnemonic>;
+ defm OpName#"_fake16":VOPCX_Real_t16_gfx11_gfx12<op, asm_name, OpName#"_fake16", pseudo_mnemonic>;
+}
+
+defm V_CMP_F_F16 : VOPC_Real_t16_and_fake16_gfx11<0x000, "v_cmp_f_f16">;
+defm V_CMP_LT_F16 : VOPC_Real_t16_and_fake16_gfx11_gfx12<0x001, "v_cmp_lt_f16">;
defm V_CMP_EQ_F16_t16 : VOPC_Real_t16_gfx11_gfx12<0x002, "v_cmp_eq_f16">;
defm V_CMP_LE_F16_t16 : VOPC_Real_t16_gfx11_gfx12<0x003, "v_cmp_le_f16">;
defm V_CMP_GT_F16_t16 : VOPC_Real_t16_gfx11_gfx12<0x004, "v_cmp_gt_f16">;
@@ -1686,8 +1852,6 @@ defm V_CMP_NEQ_F16_t16 : VOPC_Real_t16_gfx11_gfx12<0x00d, "v_cmp_neq_f16">;
defm V_CMP_NLT_F16_t16 : VOPC_Real_t16_gfx11_gfx12<0x00e, "v_cmp_nlt_f16">;
defm V_CMP_T_F16_t16 : VOPC_Real_t16_gfx11<0x00f, "v_cmp_t_f16", "V_CMP_TRU_F16_t16", "v_cmp_tru_f16">;
-defm V_CMP_F_F16_fake16 : VOPC_Real_t16_gfx11<0x000, "v_cmp_f_f16">;
-defm V_CMP_LT_F16_fake16 : VOPC_Real_t16_gfx11_gfx12<0x001, "v_cmp_lt_f16">;
defm V_CMP_EQ_F16_fake16 : VOPC_Real_t16_gfx11_gfx12<0x002, "v_cmp_eq_f16">;
defm V_CMP_LE_F16_fake16 : VOPC_Real_t16_gfx11_gfx12<0x003, "v_cmp_le_f16">;
defm V_CMP_GT_F16_fake16 : VOPC_Real_t16_gfx11_gfx12<0x004, "v_cmp_gt_f16">;
@@ -1781,12 +1945,11 @@ defm V_CMP_NE_U64 : VOPC_Real_gfx11_gfx12<0x05d>;
defm V_CMP_GE_U64 : VOPC_Real_gfx11_gfx12<0x05e>;
defm V_CMP_T_U64 : VOPC_Real_gfx11<0x05f>;
-defm V_CMP_CLASS_F16_t16 : VOPC_Real_t16_gfx11_gfx12<0x07d, "v_cmp_class_f16">;
-defm V_CMP_CLASS_F16_fake16 : VOPC_Real_t16_gfx11_gfx12<0x07d, "v_cmp_class_f16">;
+defm V_CMP_CLASS_F16 : VOPC_Real_t16_and_fake16_gfx11_gfx12<0x07d, "v_cmp_class_f16">;
defm V_CMP_CLASS_F32 : VOPC_Real_gfx11_gfx12<0x07e>;
defm V_CMP_CLASS_F64 : VOPC_Real_gfx11_gfx12<0x07f>;
-defm V_CMPX_F_F16_t16 : VOPCX_Real_t16_gfx11<0x080, "v_cmpx_f_f16">;
+defm V_CMPX_F_F16 : VOPCX_Real_t16_and_fake16_gfx11<0x080, "v_cmpx_f_f16">;
defm V_CMPX_LT_F16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x081, "v_cmpx_lt_f16">;
defm V_CMPX_EQ_F16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x082, "v_cmpx_eq_f16">;
defm V_CMPX_LE_F16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x083, "v_cmpx_le_f16">;
@@ -1803,7 +1966,6 @@ defm V_CMPX_NEQ_F16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x08d, "v_cmpx_neq_f16">;
defm V_CMPX_NLT_F16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x08e, "v_cmpx_nlt_f16">;
defm V_CMPX_T_F16_t16 : VOPCX_Real_with_name_gfx11<0x08f, "V_CMPX_TRU_F16_t16", "v_cmpx_t_f16", "v_cmpx_tru_f16">;
-defm V_CMPX_F_F16_fake16 : VOPCX_Real_t16_gfx11<0x080, "v_cmpx_f_f16">;
defm V_CMPX_LT_F16_fake16 : VOPCX_Real_t16_gfx11_gfx12<0x081, "v_cmpx_lt_f16">;
defm V_CMPX_EQ_F16_fake16 : VOPCX_Real_t16_gfx11_gfx12<0x082, "v_cmpx_eq_f16">;
defm V_CMPX_LE_F16_fake16 : VOPCX_Real_t16_gfx11_gfx12<0x083, "v_cmpx_le_f16">;
diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td
index 0c21ef29cc3c74..0d7759d488ed91 100644
--- a/llvm/lib/Target/AMDGPU/VOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td
@@ -1875,15 +1875,16 @@ include "VOPDInstructions.td"
class ClassPat<Instruction inst, ValueType vt> : GCNPat <
(is_fpclass (vt (VOP3ModsNonCanonicalizing vt:$src0, i32:$src0_mods)), (i32 timm:$mask)),
- (inst i32:$src0_mods, vt:$src0, (V_MOV_B32_e32 timm:$mask))
+ (inst i32:$src0_mods, vt:$src0, SRCMODS.NONE, (V_MOV_B32_e32 timm:$mask))
>;
def : ClassPat<V_CMP_CLASS_F16_e64, f16> {
- let OtherPredicates = [NotHasTrue16BitInsts, Has16BitInsts];
+ let OtherPredicates = [Has16BitInsts];
+ let True16Predicate = NotHasTrue16BitInsts;
}
-def : ClassPat<V_CMP_CLASS_F16_t16_e64, f16> {
- let OtherPredicates = [HasTrue16BitInsts];
+def : ClassPat<V_CMP_CLASS_F16_fake16_e64, f16> {
+ let True16Predicate = UseFakeTrue16Insts;
}
def : ClassPat<V_CMP_CLASS_F32_e64, f32>;
@@ -1903,7 +1904,7 @@ def VOP2InfoTable : VOPInfoTable<"VOP2">;
def VOP3InfoTable : VOPInfoTable<"VOP3">;
class VOPC64Table <string Format> : GenericTable {
- let FilterClass = "VOPC64_" # Format # "_Base";
+ let FilterClass = "VOPC64_" # Format;
let CppTypeName = "VOPC64DPPInfo";
let Fields = ["Opcode"];
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.class.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.class.mir
index 46c801b5738e11..41d9647b4a23e4 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.class.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.class.mir
@@ -16,14 +16,15 @@ body: |
; WAVE64-NEXT: {{ $}}
; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; WAVE64-NEXT: [[V_CMP_CLASS_F32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_CLASS_F32_e64 0, [[COPY]], [[COPY1]], implicit $exec
+ ; WAVE64-NEXT: [[V_CMP_CLASS_F32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_CLASS_F32_e64 0, [[COPY]], 0, [[COPY1]], implicit $exec
; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_CLASS_F32_e64_]]
+ ;
; WAVE32-LABEL: name: class_s32_vcc_sv
; WAVE32: liveins: $sgpr0, $vgpr0
; WAVE32-NEXT: {{ $}}
; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; WAVE32-NEXT: [[V_CMP_CLASS_F32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_CLASS_F32_e64 0, [[COPY]], [[COPY1]], implicit $exec
+ ; WAVE32-NEXT: [[V_CMP_CLASS_F32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_CLASS_F32_e64 0, [[COPY]], 0, [[COPY1]], implicit $exec
; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_CLASS_F32_e64_]]
%0:sgpr(s32) = COPY $sgpr0
%1:vgpr(s32) = COPY $vgpr0
@@ -45,14 +46,15 @@ body: |
; WAVE64-NEXT: {{ $}}
; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; WAVE64-NEXT: [[V_CMP_CLASS_F32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_CLASS_F32_e64 0, [[COPY]], [[COPY1]], implicit $exec
+ ; WAVE64-NEXT: [[V_CMP_CLASS_F32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_CLASS_F32_e64 0, [[COPY]], 0, [[COPY1]], implicit $exec
; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_CLASS_F32_e64_]]
+ ;
; WAVE32-LABEL: name: class_s32_vcc_vs
; WAVE32: liveins: $sgpr0, $vgpr0
; WAVE32-NEXT: {{ $}}
; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; WAVE32-NEXT: [[V_CMP_CLASS_F32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_CLASS_F32_e64 0, [[COPY]], [[COPY1]], implicit $exec
+ ; WAVE32-NEXT: [[V_CMP_CLASS_F32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_CLASS_F32_e64 0, [[COPY]], 0, [[COPY1]], implicit $exec
; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_CLASS_F32_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:sgpr(s32) = COPY $sgpr0
@@ -74,14 +76,15 @@ body: |
; WAVE64-NEXT: {{ $}}
; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; WAVE64-NEXT: [[V_CMP_CLASS_F32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_CLASS_F32_e64 0, [[COPY]], [[COPY1]], implicit $exec
+ ; WAVE64-NEXT: [[V_CMP_CLASS_F32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_CLASS_F32_e64 0, [[COPY]], 0, [[COPY1]], implicit $exec
; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_CLASS_F32_e64_]]
+ ;
; WAVE32-LABEL: name: class_s32_vcc_vv
; WAVE32: liveins: $vgpr0, $vgpr1
; WAVE32-NEXT: {{ $}}
; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; WAVE32-NEXT: [[V_CMP_CLASS_F32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_CLASS_F32_e64 0, [[COPY]], [[COPY1]], implicit $exec
+ ; WAVE32-NEXT: [[V_CMP_CLASS_F32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_CLASS_F32_e64 0, [[COPY]], 0, [[COPY1]], implicit $exec
; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_CLASS_F32_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
@@ -103,14 +106,15 @@ body: |
; WAVE64-NEXT: {{ $}}
; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; WAVE64-NEXT: [[V_CMP_CLASS_F64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_CLASS_F64_e64 0, [[COPY]], [[COPY1]], implicit $exec
+ ; WAVE64-NEXT: [[V_CMP_CLASS_F64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_CLASS_F64_e64 0, [[COPY]], 0, [[COPY1]], implicit $exec
; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_CLASS_F64_e64_]]
+ ;
; WAVE32-LABEL: name: class_s64_vcc_sv
; WAVE32: liveins: $sgpr0_sgpr1, $vgpr0
; WAVE32-NEXT: {{ $}}
; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; WAVE32-NEXT: [[V_CMP_CLASS_F64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_CLASS_F64_e64 0, [[COPY]], [[COPY1]], implicit $exec
+ ; WAVE32-NEXT: [[V_CMP_CLASS_F64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_CLASS_F64_e64 0, [[COPY]], 0, [[COPY1]], implicit $exec
; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_CLASS_F64_e64_]]
%0:sgpr(s64) = COPY $sgpr0_sgpr1
%1:vgpr(s32) = COPY $vgpr0
@@ -133,14 +137,15 @@ body: |
; WAVE64-NEXT: {{ $}}
; WAVE64-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; WAVE64-NEXT: [[V_CMP_CLASS_F64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_CLASS_F64_e64 0, [[COPY]], [[COPY1]], implicit $exec
+ ; WAVE64-NEXT: [[V_CMP_CLASS_F64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_CLASS_F64_e64 0, [[COPY]], 0, [[COPY1]], implicit $exec
; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_CLASS_F64_e64_]]
+ ;
; WAVE32-LABEL: name: class_s64_vcc_vs
; WAVE32: liveins: $sgpr0_sgpr1, $vgpr0
; WAVE32-NEXT: {{ $}}
; WAVE32-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; WAVE32-NEXT: [[V_CMP_CLASS_F64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_CLASS_F64_e64 0, [[COPY]], [[COPY1]], implicit $exec
+ ; WAVE32-NEXT: [[V_CMP_CLASS_F64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_CLASS_F64_e64 0, [[COPY]], 0, [[COPY1]], implicit $exec
; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_CLASS_F64_e64_]]
%0:vgpr(s64) = COPY $vgpr0_vgpr1
%1:sgpr(s32) = COPY $sgpr0
@@ -163,14 +168,15 @@ body: |
; WAVE64-NEXT: {{ $}}
; WAVE64-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; WAVE64-NEXT: [[V_CMP_CLASS_F64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_CLASS_F64_e64 0, [[COPY]], [[COPY1]], implicit $exec
+ ; WAVE64-NEXT: [[V_CMP_CLASS_F64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_CLASS_F64_e64 0, [[COPY]], 0, [[COPY1]], implicit $exec
; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_CLASS_F64_e64_]]
+ ;
; WAVE32-LABEL: name: class_s64_vcc_vv
; WAVE32: liveins: $vgpr0_vgpr1, $vgpr2
; WAVE32-NEXT: {{ $}}
; WAVE32-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; WAVE32-NEXT: [[V_CMP_CLASS_F64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_CLASS_F64_e64 0, [[COPY]], [[COPY1]], implicit $exec
+ ; WAVE32-NEXT: [[V_CMP_CLASS_F64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_CLASS_F64_e64 0, [[COPY]], 0, [[COPY1]], implicit $exec
; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_CLASS_F64_e64_]]
%0:vgpr(s64) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = COPY $vgpr2
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.class.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.class.s16.mir
index 3ca3928fbfad30..c1f334c5ec043a 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.class.s16.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.class.s16.mir
@@ -24,14 +24,15 @@ body: |
; WAVE32-NEXT: {{ $}}
; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; WAVE32-NEXT: [[V_CMP_CLASS_F16_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_CLASS_F16_e64 0, [[COPY]], [[COPY1]], implicit $exec
+ ; WAVE32-NEXT: [[V_CMP_CLASS_F16_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_CLASS_F16_e64 0, [[COPY]], 0, [[COPY1]], implicit $exec
; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_CLASS_F16_e64_]]
+ ;
; WAVE64-LABEL: name: class_s16_vcc_sv
; WAVE64: liveins: $sgpr0, $vgpr0
; WAVE64-NEXT: {{ $}}
; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; WAVE64-NEXT: [[V_CMP_CLASS_F16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_CLASS_F16_e64 0, [[COPY]], [[COPY1]], implicit $exec
+ ; WAVE64-NEXT: [[V_CMP_CLASS_F16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_CLASS_F16_e64 0, [[COPY]], 0, [[COPY1]], implicit $exec
; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_CLASS_F16_e64_]]
%0:sgpr(s32) = COPY $sgpr0
%1:vgpr(s32) = COPY $vgpr0
@@ -54,14 +55,15 @@ body: |
; WAVE32-NEXT: {{ $}}
; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; WAVE32-NEXT: [[V_CMP_CLASS_F16_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_CLASS_F16_e64 0, [[COPY]], [[COPY1]], implicit $exec
+ ; WAVE32-NEXT: [[V_CMP_CLASS_F16_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_CLASS_F16_e64 0, [[COPY]], 0, [[COPY1]], implicit $exec
; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_CLASS_F16_e64_]]
+ ;
; WAVE64-LABEL: name: class_s16_vcc_vs
; WAVE64: liveins: $sgpr0, $vgpr0
; WAVE64-NEXT: {{ $}}
; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; WAVE64-NEXT: [[V_CMP_CLASS_F16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_CLASS_F16_e64 0, [[COPY]], [[COPY1]], implicit $exec
+ ; WAVE64-NEXT: [[V_CMP_CLASS_F16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_CLASS_F16_e64 0, [[COPY]], 0, [[COPY1]], implicit $exec
; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_CLASS_F16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:sgpr(s32) = COPY $sgpr0
@@ -84,14 +86,15 @@ body: |
; WAVE32-NEXT: {{ $}}
; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; WAVE32-NEXT: [[V_CMP_CLASS_F16_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_CLASS_F16_e64 0, [[COPY]], [[COPY1]], implicit $exec
+ ; WAVE32-NEXT: [[V_CMP_CLASS_F16_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_CLASS_F16_e64 0, [[COPY]], 0, [[COPY1]], implicit $exec
; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_CLASS_F16_e64_]]
+ ;
; WAVE64-LABEL: name: class_s16_vcc_vv
; WAVE64: liveins: $vgpr0, $vgpr1
; WAVE64-NEXT: {{ $}}
; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; WAVE64-NEXT: [[V_CMP_CLASS_F16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_CLASS_F16_e64 0, [[COPY]], [[COPY1]], implicit $exec
+ ; WAVE64-NEXT: [[V_CMP_CLASS_F16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_CLASS_F16_e64 0, [[COPY]], 0, [[COPY1]], implicit $exec
; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_CLASS_F16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w32.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w32.mir
index 17cdab46c3b936..3cd13f96c8020f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w32.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w32.mir
@@ -17,8 +17,8 @@ body: |
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK-NEXT: [[V_CVT_F16_F32_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_CVT_F16_F32_fake16_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_CMP_F_F16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_F_F16_t16_e64 0, [[V_CVT_F16_F32_fake16_e64_]], 0, [[V_CVT_F16_F32_fake16_e64_1]], 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_CMP_F_F16_t16_e64_]]
+ ; CHECK-NEXT: [[V_CMP_F_F16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_F_F16_fake16_e64 0, [[V_CVT_F16_F32_fake16_e64_]], 0, [[V_CVT_F16_F32_fake16_e64_1]], 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_CMP_F_F16_fake16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_FPTRUNC %0
@@ -43,8 +43,8 @@ body: |
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK-NEXT: [[V_CVT_F16_F32_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_CVT_F16_F32_fake16_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_CMP_TRU_F16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_TRU_F16_t16_e64 0, [[V_CVT_F16_F32_fake16_e64_]], 0, [[V_CVT_F16_F32_fake16_e64_1]], 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_CMP_TRU_F16_t16_e64_]]
+ ; CHECK-NEXT: [[V_CMP_TRU_F16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_TRU_F16_fake16_e64 0, [[V_CVT_F16_F32_fake16_e64_]], 0, [[V_CVT_F16_F32_fake16_e64_1]], 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_CMP_TRU_F16_fake16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_FPTRUNC %0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w64.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w64.mir
index 158076a3b74a2a..9c0a02c9377e12 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w64.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w64.mir
@@ -17,8 +17,8 @@ body: |
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK-NEXT: [[V_CVT_F16_F32_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_CVT_F16_F32_fake16_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_CMP_F_F16_t16_e64_:%[0-9]+]]:sreg_64 = V_CMP_F_F16_t16_e64 0, [[V_CVT_F16_F32_fake16_e64_]], 0, [[V_CVT_F16_F32_fake16_e64_1]], 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_CMP_F_F16_t16_e64_]]
+ ; CHECK-NEXT: [[V_CMP_F_F16_fake16_e64_:%[0-9]+]]:sreg_64 = V_CMP_F_F16_fake16_e64 0, [[V_CVT_F16_F32_fake16_e64_]], 0, [[V_CVT_F16_F32_fake16_e64_1]], 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_CMP_F_F16_fake16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_FPTRUNC %0
@@ -43,8 +43,8 @@ body: |
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK-NEXT: [[V_CVT_F16_F32_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_CVT_F16_F32_fake16_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_CMP_TRU_F16_t16_e64_:%[0-9]+]]:sreg_64 = V_CMP_TRU_F16_t16_e64 0, [[V_CVT_F16_F32_fake16_e64_]], 0, [[V_CVT_F16_F32_fake16_e64_1]], 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_CMP_TRU_F16_t16_e64_]]
+ ; CHECK-NEXT: [[V_CMP_TRU_F16_fake16_e64_:%[0-9]+]]:sreg_64 = V_CMP_TRU_F16_fake16_e64 0, [[V_CVT_F16_F32_fake16_e64_]], 0, [[V_CVT_F16_F32_fake16_e64_1]], 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_CMP_TRU_F16_fake16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_FPTRUNC %0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-brcond.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-brcond.mir
index ecb07f79e9fd19..771c4e9ec36474 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-brcond.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-brcond.mir
@@ -208,7 +208,7 @@ body: |
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GCN-NEXT: [[V_CMP_CLASS_F32_e64_:%[0-9]+]]:sreg_64 = V_CMP_CLASS_F32_e64 0, [[COPY]], [[COPY1]], implicit $exec
+ ; GCN-NEXT: [[V_CMP_CLASS_F32_e64_:%[0-9]+]]:sreg_64 = V_CMP_CLASS_F32_e64 0, [[COPY]], 0, [[COPY1]], implicit $exec
; GCN-NEXT: $vcc = COPY [[V_CMP_CLASS_F32_e64_]]
; GCN-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc
; GCN-NEXT: {{ $}}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.s16.mir
index e994f42e0d60d6..d45bc31a127291 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.s16.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.s16.mir
@@ -20,6 +20,7 @@ body: |
; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; WAVE64-NEXT: [[V_CMP_EQ_U16_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U16_e64 [[COPY]], [[COPY1]], implicit $exec
; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_e64_]]
+ ;
; WAVE32-LABEL: name: icmp_eq_s16_sv
; WAVE32: liveins: $sgpr0, $vgpr0
; WAVE32-NEXT: {{ $}}
@@ -27,13 +28,14 @@ body: |
; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; WAVE32-NEXT: [[V_CMP_EQ_U16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_e64 [[COPY]], [[COPY1]], implicit $exec
; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_e64_]]
+ ;
; GFX11-LABEL: name: icmp_eq_s16_sv
; GFX11: liveins: $sgpr0, $vgpr0
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX11-NEXT: [[V_CMP_EQ_U16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_t16_e64 [[COPY]], [[COPY1]], implicit $exec
- ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_t16_e64_]]
+ ; GFX11-NEXT: [[V_CMP_EQ_U16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_fake16_e64_]]
%0:sgpr(s32) = COPY $sgpr0
%1:vgpr(s32) = COPY $vgpr0
%2:sgpr(s16) = G_TRUNC %0
@@ -59,6 +61,7 @@ body: |
; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
; WAVE64-NEXT: [[V_CMP_EQ_U16_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U16_e64 [[COPY]], [[COPY1]], implicit $exec
; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_e64_]]
+ ;
; WAVE32-LABEL: name: icmp_eq_s16_vs
; WAVE32: liveins: $sgpr0, $vgpr0
; WAVE32-NEXT: {{ $}}
@@ -66,13 +69,14 @@ body: |
; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
; WAVE32-NEXT: [[V_CMP_EQ_U16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_e64 [[COPY]], [[COPY1]], implicit $exec
; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_e64_]]
+ ;
; GFX11-LABEL: name: icmp_eq_s16_vs
; GFX11: liveins: $sgpr0, $vgpr0
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; GFX11-NEXT: [[V_CMP_EQ_U16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_t16_e64 [[COPY]], [[COPY1]], implicit $exec
- ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_t16_e64_]]
+ ; GFX11-NEXT: [[V_CMP_EQ_U16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_fake16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:sgpr(s32) = COPY $sgpr0
%2:vgpr(s16) = G_TRUNC %0
@@ -98,6 +102,7 @@ body: |
; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; WAVE64-NEXT: [[V_CMP_EQ_U16_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U16_e64 [[COPY]], [[COPY1]], implicit $exec
; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_e64_]]
+ ;
; WAVE32-LABEL: name: icmp_eq_s16_vv
; WAVE32: liveins: $vgpr0, $vgpr1
; WAVE32-NEXT: {{ $}}
@@ -105,13 +110,14 @@ body: |
; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; WAVE32-NEXT: [[V_CMP_EQ_U16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_e64 [[COPY]], [[COPY1]], implicit $exec
; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_e64_]]
+ ;
; GFX11-LABEL: name: icmp_eq_s16_vv
; GFX11: liveins: $vgpr0, $vgpr1
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX11-NEXT: [[V_CMP_EQ_U16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_t16_e64 [[COPY]], [[COPY1]], implicit $exec
- ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_t16_e64_]]
+ ; GFX11-NEXT: [[V_CMP_EQ_U16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_fake16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_TRUNC %0
@@ -137,6 +143,7 @@ body: |
; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; WAVE64-NEXT: [[V_CMP_NE_U16_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U16_e64 [[COPY]], [[COPY1]], implicit $exec
; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_NE_U16_e64_]]
+ ;
; WAVE32-LABEL: name: icmp_ne_s16_vv
; WAVE32: liveins: $vgpr0, $vgpr1
; WAVE32-NEXT: {{ $}}
@@ -144,13 +151,14 @@ body: |
; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; WAVE32-NEXT: [[V_CMP_NE_U16_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U16_e64 [[COPY]], [[COPY1]], implicit $exec
; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_NE_U16_e64_]]
+ ;
; GFX11-LABEL: name: icmp_ne_s16_vv
; GFX11: liveins: $vgpr0, $vgpr1
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX11-NEXT: [[V_CMP_NE_U16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U16_t16_e64 [[COPY]], [[COPY1]], implicit $exec
- ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_NE_U16_t16_e64_]]
+ ; GFX11-NEXT: [[V_CMP_NE_U16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_NE_U16_fake16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_TRUNC %0
@@ -176,6 +184,7 @@ body: |
; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; WAVE64-NEXT: [[V_CMP_LT_I16_e64_:%[0-9]+]]:sreg_64 = V_CMP_LT_I16_e64 [[COPY]], [[COPY1]], implicit $exec
; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_I16_e64_]]
+ ;
; WAVE32-LABEL: name: icmp_slt_s16_vv
; WAVE32: liveins: $vgpr0, $vgpr1
; WAVE32-NEXT: {{ $}}
@@ -183,13 +192,14 @@ body: |
; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; WAVE32-NEXT: [[V_CMP_LT_I16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_I16_e64 [[COPY]], [[COPY1]], implicit $exec
; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_I16_e64_]]
+ ;
; GFX11-LABEL: name: icmp_slt_s16_vv
; GFX11: liveins: $vgpr0, $vgpr1
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX11-NEXT: [[V_CMP_LT_I16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_I16_t16_e64 [[COPY]], [[COPY1]], implicit $exec
- ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_I16_t16_e64_]]
+ ; GFX11-NEXT: [[V_CMP_LT_I16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_I16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_I16_fake16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_TRUNC %0
@@ -215,6 +225,7 @@ body: |
; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; WAVE64-NEXT: [[V_CMP_LE_I16_e64_:%[0-9]+]]:sreg_64 = V_CMP_LE_I16_e64 [[COPY]], [[COPY1]], implicit $exec
; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_I16_e64_]]
+ ;
; WAVE32-LABEL: name: icmp_sle_s16_vv
; WAVE32: liveins: $vgpr0, $vgpr1
; WAVE32-NEXT: {{ $}}
@@ -222,13 +233,14 @@ body: |
; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; WAVE32-NEXT: [[V_CMP_LE_I16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LE_I16_e64 [[COPY]], [[COPY1]], implicit $exec
; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_I16_e64_]]
+ ;
; GFX11-LABEL: name: icmp_sle_s16_vv
; GFX11: liveins: $vgpr0, $vgpr1
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX11-NEXT: [[V_CMP_LE_I16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LE_I16_t16_e64 [[COPY]], [[COPY1]], implicit $exec
- ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_I16_t16_e64_]]
+ ; GFX11-NEXT: [[V_CMP_LE_I16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LE_I16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_I16_fake16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_TRUNC %0
@@ -254,6 +266,7 @@ body: |
; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; WAVE64-NEXT: [[V_CMP_LT_U16_e64_:%[0-9]+]]:sreg_64 = V_CMP_LT_U16_e64 [[COPY]], [[COPY1]], implicit $exec
; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_U16_e64_]]
+ ;
; WAVE32-LABEL: name: icmp_ult_s16_vv
; WAVE32: liveins: $vgpr0, $vgpr1
; WAVE32-NEXT: {{ $}}
@@ -261,13 +274,14 @@ body: |
; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; WAVE32-NEXT: [[V_CMP_LT_U16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_U16_e64 [[COPY]], [[COPY1]], implicit $exec
; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_U16_e64_]]
+ ;
; GFX11-LABEL: name: icmp_ult_s16_vv
; GFX11: liveins: $vgpr0, $vgpr1
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX11-NEXT: [[V_CMP_LT_U16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_U16_t16_e64 [[COPY]], [[COPY1]], implicit $exec
- ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_U16_t16_e64_]]
+ ; GFX11-NEXT: [[V_CMP_LT_U16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_U16_fake16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_TRUNC %0
@@ -293,6 +307,7 @@ body: |
; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; WAVE64-NEXT: [[V_CMP_LE_U16_e64_:%[0-9]+]]:sreg_64 = V_CMP_LE_U16_e64 [[COPY]], [[COPY1]], implicit $exec
; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_U16_e64_]]
+ ;
; WAVE32-LABEL: name: icmp_ule_s16_vv
; WAVE32: liveins: $vgpr0, $vgpr1
; WAVE32-NEXT: {{ $}}
@@ -300,13 +315,14 @@ body: |
; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; WAVE32-NEXT: [[V_CMP_LE_U16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LE_U16_e64 [[COPY]], [[COPY1]], implicit $exec
; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_U16_e64_]]
+ ;
; GFX11-LABEL: name: icmp_ule_s16_vv
; GFX11: liveins: $vgpr0, $vgpr1
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX11-NEXT: [[V_CMP_LE_U16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LE_U16_t16_e64 [[COPY]], [[COPY1]], implicit $exec
- ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_U16_t16_e64_]]
+ ; GFX11-NEXT: [[V_CMP_LE_U16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LE_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_U16_fake16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_TRUNC %0
diff --git a/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16-fake16.mir b/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16-fake16.mir
index 265bdd0cf2f48f..a22226b6d0f1be 100644
--- a/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16-fake16.mir
+++ b/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16-fake16.mir
@@ -1,6 +1,26 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -run-pass=si-fix-sgpr-copies -verify-machineinstrs -o - %s | FileCheck --check-prefixes=GCN %s
+---
+name: cmp_f16
+body: |
+ bb.0.entry:
+ ; GCN-LABEL: name: cmp_f16
+ ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[V_CVT_F16_U16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F16_U16_fake16_e64 [[DEF]], 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: [[DEF2:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[V_CMP_LT_F16_fake16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_LT_F16_fake16_e64 0, [[V_CVT_F16_U16_fake16_e64_]], 0, [[DEF1]], 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, -1, killed [[V_CMP_LT_F16_fake16_e64_]], implicit $exec
+ %0:vgpr_32 = IMPLICIT_DEF
+ %1:sreg_32 = IMPLICIT_DEF
+ %2:vgpr_32 = V_CVT_F16_U16_fake16_e64 %0:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %3:sreg_32 = COPY %2:vgpr_32
+ nofpexcept S_CMP_LT_F16 killed %3:sreg_32, %1:sreg_32, implicit-def $scc, implicit $mode
+ %4:sreg_32_xm0_xexec = COPY $scc
+ %5:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, -1, killed %4, implicit $exec
+...
+
---
name: cvt_hi_f32_f16
body: |
diff --git a/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16-true16.mir b/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16-true16.mir
index 03a77dc2b8b5e2..471be11c3185b6 100644
--- a/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16-true16.mir
+++ b/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16-true16.mir
@@ -1,7 +1,26 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -run-pass=si-fix-sgpr-copies -verify-machineinstrs -o - %s | FileCheck --check-prefixes=GCN %s
# XFAIL: *
-# FIXME-TRUE16. reenable after CVT_F16_U16_t16 is supported in CodeGen
+# FIXME-TRUE16 renable this test after fixed sgpr copy for true16
+---
+name: cmp_f16
+body: |
+ bb.0.entry:
+ ; GCN-LABEL: name: cmp_f16
+ ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[V_CVT_F16_U16_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F16_U16_e64 [[DEF]], 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: [[DEF2:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[V_CMP_LT_F16_t16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_LT_F16_t16_e64 0, [[V_CVT_F16_U16_e64_]], 0, [[DEF1]], 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, -1, killed [[V_CMP_LT_F16_t16_e64_]], implicit $exec
+ %0:vgpr_16 = IMPLICIT_DEF
+ %1:sreg_32 = IMPLICIT_DEF
+ %2:vgpr_16 = V_CVT_F16_U16_t16_e64 0, %0:vgpr_16, 0, 0, 0, implicit $mode, implicit $exec
+ %3:sreg_32 = COPY %2:vgpr_16
+ nofpexcept S_CMP_LT_F16 killed %3:sreg_32, %1:sreg_32, implicit-def $scc, implicit $mode
+ %4:sreg_32_xm0_xexec = COPY $scc
+ %5:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, -1, killed %4, implicit $exec
+...
---
name: cvt_hi_f32_f16
@@ -14,7 +33,7 @@ body: |
; GCN-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 16, [[V_CVT_F16_U16_e64_]], implicit $exec
; GCN-NEXT: [[V_CVT_F32_F16_t16_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_F16_t16_e64 0, [[V_LSHRREV_B32_e64_]], 0, 0, implicit $mode, implicit $exec
%0:vgpr_16 = IMPLICIT_DEF
- %1:vgpr_16 = V_CVT_F16_U16_t16_e64 %0:vgpr_16, 0, 0, 0, implicit $mode, implicit $exec
+ %1:vgpr_16 = V_CVT_F16_U16_t16_e64 0, %0:vgpr_16, 0, 0, 0, implicit $mode, implicit $exec
%2:sreg_32 = COPY %1:vgpr_16
%3:sreg_32 = S_CVT_HI_F32_F16 %2:sreg_32, implicit $mode
...
diff --git a/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16.mir b/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16.mir
index 9a727a321d7869..e8291f7ab8f729 100644
--- a/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16.mir
+++ b/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16.mir
@@ -2,26 +2,6 @@
# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -run-pass=si-fix-sgpr-copies -verify-machineinstrs -o - %s | FileCheck --check-prefixes=GCN,REAL16 %s
# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -run-pass=si-fix-sgpr-copies -verify-machineinstrs -o - %s | FileCheck --check-prefixes=GCN,FAKE16 %s
----
-name: cmp_f16
-body: |
- bb.0.entry:
- ; GCN-LABEL: name: cmp_f16
- ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GCN-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
- ; GCN-NEXT: [[V_CVT_F16_U16_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F16_U16_e64 [[DEF]], 0, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: [[DEF2:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
- ; GCN-NEXT: [[V_CMP_LT_F16_t16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_LT_F16_t16_e64 0, [[V_CVT_F16_U16_e64_]], 0, [[DEF1]], 0, implicit $mode, implicit $exec
- ; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, -1, killed [[V_CMP_LT_F16_t16_e64_]], implicit $exec
- %0:vgpr_32 = IMPLICIT_DEF
- %1:sreg_32 = IMPLICIT_DEF
- %2:vgpr_32 = V_CVT_F16_U16_e64 %0:vgpr_32, 0, 0, implicit $mode, implicit $exec
- %3:sreg_32 = COPY %2:vgpr_32
- nofpexcept S_CMP_LT_F16 killed %3:sreg_32, %1:sreg_32, implicit-def $scc, implicit $mode
- %4:sreg_32_xm0_xexec = COPY $scc
- %5:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, -1, killed %4, implicit $exec
-...
-
---
name: fmac_f16
body: |
diff --git a/llvm/test/CodeGen/AMDGPU/shrink-true16.mir b/llvm/test/CodeGen/AMDGPU/shrink-true16.mir
index be759049bc3a7d..245c5e1005d08f 100644
--- a/llvm/test/CodeGen/AMDGPU/shrink-true16.mir
+++ b/llvm/test/CodeGen/AMDGPU/shrink-true16.mir
@@ -11,8 +11,8 @@ body: |
; GFX1100-LABEL: name: 16bit_lo128_shrink
; GFX1100: liveins: $vgpr127
; GFX1100-NEXT: {{ $}}
- ; GFX1100-NEXT: V_CMP_EQ_U16_t16_e32 0, $vgpr127, implicit-def $vcc_lo, implicit $exec, implicit $exec
- $vcc_lo = V_CMP_EQ_U16_t16_e64 0, $vgpr127, implicit-def $vcc, implicit $exec
+ ; GFX1100-NEXT: V_CMP_EQ_U16_fake16_e32 0, $vgpr127, implicit-def $vcc_lo, implicit $exec, implicit $exec
+ $vcc_lo = V_CMP_EQ_U16_fake16_e64 0, $vgpr127, implicit-def $vcc, implicit $exec
...
---
@@ -24,6 +24,6 @@ body: |
; GFX1100-LABEL: name: 16bit_lo128_no_shrink
; GFX1100: liveins: $vgpr128
; GFX1100-NEXT: {{ $}}
- ; GFX1100-NEXT: $vcc_lo = V_CMP_EQ_U16_t16_e64 0, $vgpr128, implicit-def $vcc_lo, implicit $exec
- $vcc_lo = V_CMP_EQ_U16_t16_e64 0, $vgpr128, implicit-def $vcc, implicit $exec
+ ; GFX1100-NEXT: $vcc_lo = V_CMP_EQ_U16_fake16_e64 0, $vgpr128, implicit-def $vcc_lo, implicit $exec
+ $vcc_lo = V_CMP_EQ_U16_fake16_e64 0, $vgpr128, implicit-def $vcc, implicit $exec
...
diff --git a/llvm/test/CodeGen/AMDGPU/vopc_dpp.mir b/llvm/test/CodeGen/AMDGPU/vopc_dpp.mir
index 123893674ff5e9..7521722e33b956 100644
--- a/llvm/test/CodeGen/AMDGPU/vopc_dpp.mir
+++ b/llvm/test/CodeGen/AMDGPU/vopc_dpp.mir
@@ -18,15 +18,15 @@ body: |
; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN-NEXT: V_CMP_LT_F32_e32_dpp 0, [[COPY1]], 0, [[COPY]], 1, 15, 15, 1, implicit-def $vcc, implicit $mode, implicit $exec
; GCN-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], [[COPY1]], 1, 15, 15, 1, implicit $exec
- ; GCN-NEXT: V_CMPX_EQ_I16_t16_nosdst_e64 [[V_MOV_B32_dpp]], [[COPY]], implicit-def $exec, implicit-def $vcc_lo, implicit $mode, implicit $exec
- ; GCN-NEXT: [[V_CMP_CLASS_F16_t16_e64_dpp:%[0-9]+]]:sgpr_32 = V_CMP_CLASS_F16_t16_e64_dpp 0, [[COPY1]], [[COPY]], 1, 15, 15, 1, implicit $exec
- ; GCN-NEXT: [[V_CMP_GE_F16_t16_e64_dpp:%[0-9]+]]:sgpr_32 = V_CMP_GE_F16_t16_e64_dpp 1, [[COPY1]], 0, [[COPY]], 1, 1, 15, 15, 1, implicit $mode, implicit $exec
+ ; GCN-NEXT: V_CMPX_EQ_I16_fake16_nosdst_e64 [[V_MOV_B32_dpp]], [[COPY]], implicit-def $exec, implicit-def $vcc_lo, implicit $mode, implicit $exec
+ ; GCN-NEXT: [[V_CMP_CLASS_F16_fake16_e64_dpp:%[0-9]+]]:sgpr_32 = V_CMP_CLASS_F16_fake16_e64_dpp 0, [[COPY1]], 0, [[COPY]], 1, 15, 15, 1, implicit $exec
+ ; GCN-NEXT: [[V_CMP_GE_F16_fake16_e64_dpp:%[0-9]+]]:sgpr_32 = V_CMP_GE_F16_fake16_e64_dpp 1, [[COPY1]], 0, [[COPY]], 1, 1, 15, 15, 1, implicit $mode, implicit $exec
; GCN-NEXT: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], [[COPY1]], 1, 15, 15, 1, implicit $exec
; GCN-NEXT: V_CMPX_GT_U32_nosdst_e64 [[V_MOV_B32_dpp1]], [[COPY]], implicit-def $exec, implicit $mode, implicit $exec
- ; GCN-NEXT: V_CMP_CLASS_F32_e32_dpp 2, [[COPY1]], [[COPY]], 1, 15, 15, 1, implicit-def $vcc, implicit $exec
+ ; GCN-NEXT: V_CMP_CLASS_F32_e32_dpp 2, [[COPY1]], 0, [[COPY]], 1, 15, 15, 1, implicit-def $vcc, implicit $exec
; GCN-NEXT: V_CMP_NGE_F32_e32_dpp 0, [[COPY1]], 0, [[COPY]], 1, 15, 15, 1, implicit-def $vcc, implicit $mode, implicit $exec
; GCN-NEXT: [[V_MOV_B32_dpp2:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], [[COPY1]], 1, 15, 15, 1, implicit $exec
- ; GCN-NEXT: [[V_CMP_NGE_F16_t16_e64_:%[0-9]+]]:sgpr_32 = V_CMP_NGE_F16_t16_e64 0, [[V_CMP_NGE_F16_t16_e64_]], 0, [[COPY]], 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: [[V_CMP_NGE_F16_fake16_e64_:%[0-9]+]]:sgpr_32 = V_CMP_NGE_F16_fake16_e64 0, [[V_CMP_NGE_F16_fake16_e64_]], 0, [[COPY]], 0, implicit $mode, implicit $exec
; GCN-NEXT: [[V_CMP_NGE_F32_e64_dpp:%[0-9]+]]:sgpr_32 = V_CMP_NGE_F32_e64_dpp 0, [[COPY1]], 0, [[COPY]], 0, 1, 15, 15, 1, implicit $mode, implicit $exec
; GCN-NEXT: [[S_AND_B32_:%[0-9]+]]:sgpr_32 = S_AND_B32 [[V_CMP_NGE_F32_e64_dpp]], 10101, implicit-def $scc
; GCN-NEXT: V_CMP_GT_I32_e32_dpp [[COPY1]], [[COPY]], 1, 15, 15, 1, implicit-def $vcc, implicit $exec
@@ -40,20 +40,20 @@ body: |
; unsafe to combine cmpx
%5:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
- V_CMPX_EQ_I16_t16_nosdst_e64 %5, %0, implicit-def $exec, implicit-def $vcc, implicit $mode, implicit $exec
+ V_CMPX_EQ_I16_fake16_nosdst_e64 %5, %0, implicit-def $exec, implicit-def $vcc, implicit $mode, implicit $exec
%6:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
- %7:sgpr_32 = V_CMP_CLASS_F16_t16_e64 0, %6, %0, implicit-def $vcc, implicit $mode, implicit $exec
+ %7:sgpr_32 = V_CMP_CLASS_F16_fake16_e64 0, %6, 0, %0, implicit-def $vcc, implicit $mode, implicit $exec
%8:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
- %9:sgpr_32 = V_CMP_GE_F16_t16_e64 1, %8, 0, %0, 1, implicit $mode, implicit $exec
+ %9:sgpr_32 = V_CMP_GE_F16_fake16_e64 1, %8, 0, %0, 1, implicit $mode, implicit $exec
; unsafe to combine cmpx
%10:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
V_CMPX_GT_U32_nosdst_e64 %10, %0, implicit-def $exec, implicit $mode, implicit $exec
%11:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
- %12:sgpr_32 = V_CMP_CLASS_F32_e64 2, %11, %0, implicit $mode, implicit $exec
+ %12:sgpr_32 = V_CMP_CLASS_F32_e64 2, %11, 0, %0, implicit $mode, implicit $exec
; shrink
%13:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
@@ -61,7 +61,7 @@ body: |
; do not shrink True16 instructions
%15:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
- %16:sgpr_32 = V_CMP_NGE_F16_t16_e64 0, %16, 0, %0, 0, implicit $mode, implicit $exec
+ %16:sgpr_32 = V_CMP_NGE_F16_fake16_e64 0, %16, 0, %0, 0, implicit $mode, implicit $exec
; do not shrink, sdst used
%17:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
@@ -89,7 +89,7 @@ body: |
; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GCN-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], [[COPY1]], 1, 15, 14, 1, implicit $exec
- ; GCN-NEXT: [[V_CMP_CLASS_F16_t16_e64_:%[0-9]+]]:sgpr_32 = V_CMP_CLASS_F16_t16_e64 0, [[V_MOV_B32_dpp]], [[COPY]], implicit-def $vcc_lo, implicit $mode, implicit $exec
+ ; GCN-NEXT: [[V_CMP_CLASS_F16_fake16_e64_:%[0-9]+]]:sgpr_32 = V_CMP_CLASS_F16_fake16_e64 0, [[V_MOV_B32_dpp]], 0, [[COPY]], implicit-def $vcc_lo, implicit $mode, implicit $exec
; GCN-NEXT: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[COPY1]], 1, 13, 15, 1, implicit $exec
; GCN-NEXT: [[V_CMP_GE_F32_e64_:%[0-9]+]]:sgpr_32 = V_CMP_GE_F32_e64 1, [[V_MOV_B32_dpp1]], 0, [[COPY]], 1, implicit $mode, implicit $exec
%0:vgpr_32 = COPY $vgpr0
@@ -100,7 +100,7 @@ body: |
; Do not combine VOPC when row_mask or bank_mask is not 0xf
; All cases are covered by generic rules for creating DPP instructions
%4:vgpr_32 = V_MOV_B32_dpp %2, %1, 1, 15, 14, 1, implicit $exec
- %99:sgpr_32 = V_CMP_CLASS_F16_t16_e64 0, %4, %0, implicit-def $vcc, implicit $mode, implicit $exec
+ %99:sgpr_32 = V_CMP_CLASS_F16_fake16_e64 0, %4, 0, %0, implicit-def $vcc, implicit $mode, implicit $exec
%5:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 13, 15, 1, implicit $exec
%6:sgpr_32 = V_CMP_GE_F32_e64 1, %5, 0, %0, 1, implicit $mode, implicit $exec
More information about the llvm-commits
mailing list