[llvm] [AMDGPU][True16][MC] VOP3 profile in True16 format (PR #109031)
Brox Chen via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 8 07:08:58 PDT 2024
https://github.com/broxigarchen updated https://github.com/llvm/llvm-project/pull/109031
>From 91af38ab769e44b9ce02dee60fe37ce19309fce7 Mon Sep 17 00:00:00 2001
From: guochen2 <guochen2 at amd.com>
Date: Tue, 17 Sep 2024 15:06:04 -0400
Subject: [PATCH 1/3] [AMDGPU][True16][MC] VOP3 profile in True16 format
---
llvm/lib/Target/AMDGPU/SIInstrInfo.td | 6 +-
llvm/lib/Target/AMDGPU/VOP2Instructions.td | 4 +-
llvm/lib/Target/AMDGPU/VOP3Instructions.td | 90 ++--
llvm/lib/Target/AMDGPU/VOPInstructions.td | 308 +++++++++--
.../AMDGPU/GlobalISel/inst-select-add.s16.mir | 40 +-
.../test/CodeGen/AMDGPU/dpp_combine_gfx11.mir | 32 +-
.../isel-amdgpu-cs-chain-preserve-cc.ll | 6 +-
llvm/test/MC/AMDGPU/gfx11_asm_opsel.s | 48 ++
llvm/test/MC/AMDGPU/gfx11_asm_vop3.s | 260 ++++++----
llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s | 240 ++++-----
llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s | 172 +++---
llvm/test/MC/AMDGPU/gfx12_asm_opsel.s | 48 ++
llvm/test/MC/AMDGPU/gfx12_asm_vop3.s | 156 +++---
llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s | 328 +++++++-----
llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s | 176 ++++---
.../Disassembler/AMDGPU/gfx11_dasm_vop3.txt | 444 ++++++++++++----
.../AMDGPU/gfx11_dasm_vop3_dpp16.txt | 376 ++++++++++++--
.../AMDGPU/gfx11_dasm_vop3_dpp8.txt | 196 ++++++-
.../Disassembler/AMDGPU/gfx12_dasm_vop3.txt | 384 +++++++++++---
.../AMDGPU/gfx12_dasm_vop3_dpp16.txt | 488 ++++++++++++++++--
.../AMDGPU/gfx12_dasm_vop3_dpp8.txt | 308 ++++++++++-
21 files changed, 3143 insertions(+), 967 deletions(-)
create mode 100644 llvm/test/MC/AMDGPU/gfx11_asm_opsel.s
create mode 100644 llvm/test/MC/AMDGPU/gfx12_asm_opsel.s
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 087ca1f954464d..be42d3595bea61 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -2237,8 +2237,9 @@ class getAsmVOP3Base <int NumSrcArgs, bit HasDst, bit HasClamp,
string clamp = !if(HasClamp, "$clamp", "");
string omod = !if(HasOMod, "$omod", "");
- string ret = dst#!if(!gt(NumSrcArgs,0),", "#src0#src1#src2#opsel#bytesel#3PMods#clamp#omod, "");
-
+ string ret = dst#!if(!eq(NumSrcArgs,0),
+ "",
+ !if(HasDst,", ", "")#src0#src1#src2#opsel#bytesel#3PMods#clamp#omod);
}
class getAsmVOP3DPP<string base> {
@@ -2733,6 +2734,7 @@ def VOP_F32_F32_F16_F16 : VOPProfile <[f32, f32, f16, f16]>;
def VOP_F32_F32_F32_F32 : VOPProfile <[f32, f32, f32, f32]>;
def VOP_F64_F64_F64_F64 : VOPProfile <[f64, f64, f64, f64]>;
def VOP_I32_I32_I32_I32 : VOPProfile <[i32, i32, i32, i32]>;
+def VOP_I32_I32_I32_I16 : VOPProfile <[i32, i32, i32, i16]>;
def VOP_I64_I32_I32_I64 : VOPProfile <[i64, i32, i32, i64]>;
def VOP_I32_F32_I32_I32 : VOPProfile <[i32, f32, i32, i32]>;
def VOP_I64_I64_I32_I64 : VOPProfile <[i64, i64, i32, i64]>;
diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
index cdc32149249610..f411ea277bbe23 100644
--- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
@@ -1661,8 +1661,8 @@ multiclass VOP3Only_Realtriple_gfx11_gfx12<bits<10> op> :
VOP3Only_Realtriple<GFX11Gen, op>, VOP3Only_Realtriple<GFX12Gen, op>;
multiclass VOP3Only_Realtriple_t16_gfx11_gfx12<bits<10> op, string asmName, string OpName = NAME> :
- VOP3Only_Realtriple_t16<GFX11Gen, op, asmName, OpName>,
- VOP3Only_Realtriple_t16<GFX12Gen, op, asmName, OpName>;
+ VOP3_Realtriple_t16_gfx11<op, asmName, OpName, "", 1>,
+ VOP3_Realtriple_t16_gfx12<op, asmName, OpName, "", 1>;
multiclass VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<bits<10> op, string asmName, string OpName = NAME> {
defm OpName#"_t16": VOP3Only_Realtriple_t16_gfx11_gfx12<op, asmName, OpName#"_t16">;
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index 78ca7a2f258cb3..fe37b56bcbf862 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -569,16 +569,10 @@ def VOP3_CVT_SR_F8_F32_Profile : VOP3_Profile<VOPProfile<[i32, f32, i32, f32]>,
getAsmVOP3OpSel<3, HasClamp, HasOMod,
HasSrc0FloatMods, HasSrc1FloatMods,
HasSrc2FloatMods>.ret);
- let AsmVOP3DPP16 = !subst(", $src2_modifiers", "",
- getAsmVOP3DPP16<getAsmVOP3Base<3, 1, HasClamp, 1,
- HasOMod, 0, 1, HasSrc0FloatMods,
- HasSrc1FloatMods,
- HasSrc2FloatMods>.ret>.ret);
- let AsmVOP3DPP8 = !subst(", $src2_modifiers", "",
- getAsmVOP3DPP8<getAsmVOP3Base<3, 1, HasClamp, 1,
- HasOMod, 0, 1, HasSrc0FloatMods,
- HasSrc1FloatMods,
- HasSrc2FloatMods>.ret>.ret);
+ let AsmVOP3Base = !subst(", $src2_modifiers", "",
+ getAsmVOP3Base<NumSrcArgs, HasDst, HasClamp,
+ HasOpSel, HasOMod, IsVOP3P, HasModifiers, HasModifiers, 0/*Src1Mods*/,
+ HasModifiers, DstVT>.ret);
}
class VOP3_CVT_SR_F8_ByteSel_Profile<ValueType SrcVT> :
@@ -636,8 +630,8 @@ let SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0 in {
defm V_MAXIMUM3_F16 : VOP3Inst <"v_maximum3_f16", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, AMDGPUfmaximum3>;
} // End SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0
-defm V_ADD_I16 : VOP3Inst <"v_add_i16", VOP3_Profile<VOP_I16_I16_I16, VOP3_OPSEL>>;
-defm V_SUB_I16 : VOP3Inst <"v_sub_i16", VOP3_Profile<VOP_I16_I16_I16, VOP3_OPSEL>>;
+defm V_ADD_I16 : VOP3Inst_t16 <"v_add_i16", VOP_I16_I16_I16>;
+defm V_SUB_I16 : VOP3Inst_t16 <"v_sub_i16", VOP_I16_I16_I16>;
defm V_MAD_U32_U16 : VOP3Inst <"v_mad_u32_u16", VOP3_Profile<VOP_I32_I16_I16_I32, VOP3_OPSEL>>;
defm V_MAD_I32_I16 : VOP3Inst <"v_mad_i32_i16", VOP3_Profile<VOP_I32_I16_I16_I32, VOP3_OPSEL>>;
@@ -752,6 +746,8 @@ def : GCNPat<(DivergentBinFrag<or> (or_oneuse i64:$src0, i64:$src1), i64:$src2),
(i32 (EXTRACT_SUBREG $src1, sub1)),
(i32 (EXTRACT_SUBREG $src2, sub1))), sub1)>;
+} // End SubtargetPredicate = isGFX9Plus
+
// FIXME: Probably should hardcode clamp bit in pseudo and avoid this.
class OpSelBinOpClampPat<SDPatternOperator node,
Instruction inst> : GCNPat<
@@ -760,9 +756,14 @@ class OpSelBinOpClampPat<SDPatternOperator node,
(inst $src0_modifiers, $src0, $src1_modifiers, $src1, DSTCLAMP.ENABLE, 0)
>;
-def : OpSelBinOpClampPat<saddsat, V_ADD_I16_e64>;
-def : OpSelBinOpClampPat<ssubsat, V_SUB_I16_e64>;
-} // End SubtargetPredicate = isGFX9Plus
+let OtherPredicates = [isGFX9Plus], True16Predicate = NotHasTrue16BitInsts in {
+ def : OpSelBinOpClampPat<saddsat, V_ADD_I16_e64>;
+ def : OpSelBinOpClampPat<ssubsat, V_SUB_I16_e64>;
+} // End OtherPredicates = [isGFX9Plus], True16Predicate = NotHasTrue16BitInsts
+let True16Predicate = UseFakeTrue16Insts in {
+ def : OpSelBinOpClampPat<saddsat, V_ADD_I16_fake16_e64>;
+ def : OpSelBinOpClampPat<ssubsat, V_SUB_I16_fake16_e64>;
+} // End True16Predicate = UseFakeTrue16Insts
multiclass IMAD32_Pats <VOP3_Pseudo inst> {
def : GCNPat <
@@ -871,21 +872,31 @@ let SubtargetPredicate = isGFX10Plus in {
def : PermlanePat<int_amdgcn_permlanex16, V_PERMLANEX16_B32_e64, vt>;
}
- defm V_ADD_NC_U16 : VOP3Inst <"v_add_nc_u16", VOP3_Profile<VOP_I16_I16_I16, VOP3_OPSEL>, add>;
- defm V_SUB_NC_U16 : VOP3Inst <"v_sub_nc_u16", VOP3_Profile<VOP_I16_I16_I16, VOP3_OPSEL>, sub>;
-
- def : OpSelBinOpClampPat<uaddsat, V_ADD_NC_U16_e64>;
- def : OpSelBinOpClampPat<usubsat, V_SUB_NC_U16_e64>;
-
- // Undo sub x, c -> add x, -c canonicalization since c is more likely
- // an inline immediate than -c.
- def : GCNPat<
- (add i16:$src0, (i16 NegSubInlineIntConst16:$src1)),
- (V_SUB_NC_U16_e64 0, VSrc_b16:$src0, 0, NegSubInlineIntConst16:$src1, 0, 0)
- >;
+ defm V_ADD_NC_U16 : VOP3Inst_t16 <"v_add_nc_u16", VOP_I16_I16_I16, add>;
+ defm V_SUB_NC_U16 : VOP3Inst_t16 <"v_sub_nc_u16", VOP_I16_I16_I16, sub>;
} // End SubtargetPredicate = isGFX10Plus
+let True16Predicate = NotHasTrue16BitInsts, OtherPredicates = [isGFX10Plus] in {
+ def : OpSelBinOpClampPat<uaddsat, V_ADD_NC_U16_e64>;
+ def : OpSelBinOpClampPat<usubsat, V_SUB_NC_U16_e64>;
+ // Undo sub x, c -> add x, -c canonicalization since c is more likely
+ // an inline immediate than -c.
+ def : GCNPat<
+ (add i16:$src0, (i16 NegSubInlineIntConst16:$src1)),
+ (V_SUB_NC_U16_e64 0, VSrc_b16:$src0, 0, NegSubInlineIntConst16:$src1, 0, 0)
+ >;
+} // End True16Predicate = NotHasTrue16BitInsts, OtherPredicates = [isGFX10Plus]
+
+let True16Predicate = UseFakeTrue16Insts in {
+ def : OpSelBinOpClampPat<uaddsat, V_ADD_NC_U16_fake16_e64>;
+ def : OpSelBinOpClampPat<usubsat, V_SUB_NC_U16_fake16_e64>;
+ def : GCNPat<
+ (add i16:$src0, (i16 NegSubInlineIntConst16:$src1)),
+ (V_SUB_NC_U16_fake16_e64 0, VSrc_b16:$src0, 0, NegSubInlineIntConst16:$src1, 0, 0)
+ >;
+} // End True16Predicate = UseFakeTrue16Insts
+
let SubtargetPredicate = isGFX12Plus in {
let Constraints = "$vdst = $vdst_in", DisableEncoding="$vdst_in" in {
defm V_PERMLANE16_VAR_B32 : VOP3Inst<"v_permlane16_var_b32", VOP3_PERMLANE_VAR_Profile>;
@@ -1101,9 +1112,26 @@ multiclass VOP3_Realtriple_with_name_gfx11_gfx12<bits<10> op, string opName,
VOP3_Realtriple_with_name<GFX11Gen, op, opName, asmName>,
VOP3_Realtriple_with_name<GFX12Gen, op, opName, asmName>;
+multiclass VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<bits<10> op, string opName,
+ string asmName> {
+ defm opName#"_t16": VOP3_Realtriple_with_name_gfx11_gfx12<op, opName#"_t16", asmName>;
+ defm opName#"_fake16": VOP3_Realtriple_with_name_gfx11_gfx12<op, opName#"_fake16", asmName>;
+}
+
multiclass VOP3Dot_Realtriple_gfx11_gfx12<bits<10> op> :
VOP3Dot_Realtriple<GFX11Gen, op>, VOP3Dot_Realtriple<GFX12Gen, op>;
+multiclass VOP3_Realtriple_t16_gfx11_gfx12<bits<10> op, string asmName, string opName = NAME,
+ string pseudo_mnemonic = "", bit isSingle = 0> :
+ VOP3_Realtriple_with_name<GFX11Gen, op, opName, asmName, pseudo_mnemonic, isSingle>,
+ VOP3_Realtriple_with_name<GFX12Gen, op, opName, asmName, pseudo_mnemonic, isSingle>;
+
+multiclass VOP3_Realtriple_t16_and_f16_gfx11_gfx12<bits<10> op, string asmName, string opName = NAME,
+ string pseudo_mnemonic = "", bit isSingle = 0> {
+ defm opName#"_t16": VOP3_Realtriple_t16_gfx11_gfx12<op, asmName, opName#"_t16", pseudo_mnemonic, isSingle>;
+ defm opName#"_fake16": VOP3_Realtriple_t16_gfx11_gfx12<op, asmName, opName#"_fake16", pseudo_mnemonic, isSingle>;
+}
+
multiclass VOP3be_Real_gfx11_gfx12<bits<10> op, string opName, string asmName> :
VOP3be_Real<GFX11Gen, op, opName, asmName>,
VOP3be_Real<GFX12Gen, op, opName, asmName>;
@@ -1189,8 +1217,8 @@ defm V_DIV_SCALE_F32 : VOP3be_Real_gfx11_gfx12<0x2fc, "V_DIV_SCALE_F32", "
defm V_DIV_SCALE_F64 : VOP3be_Real_gfx11_gfx12<0x2fd, "V_DIV_SCALE_F64", "v_div_scale_f64">;
defm V_MAD_U64_U32_gfx11 : VOP3be_Real_gfx11<0x2fe, "V_MAD_U64_U32_gfx11", "v_mad_u64_u32">;
defm V_MAD_I64_I32_gfx11 : VOP3be_Real_gfx11<0x2ff, "V_MAD_I64_I32_gfx11", "v_mad_i64_i32">;
-defm V_ADD_NC_U16 : VOP3Only_Realtriple_gfx11_gfx12<0x303>;
-defm V_SUB_NC_U16 : VOP3Only_Realtriple_gfx11_gfx12<0x304>;
+defm V_ADD_NC_U16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x303, "v_add_nc_u16">;
+defm V_SUB_NC_U16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x304, "v_sub_nc_u16">;
defm V_MUL_LO_U16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x305, "v_mul_lo_u16">;
defm V_CVT_PK_I16_F32 : VOP3_Realtriple_gfx11_gfx12<0x306>;
defm V_CVT_PK_U16_F32 : VOP3_Realtriple_gfx11_gfx12<0x307>;
@@ -1198,8 +1226,8 @@ defm V_MAX_U16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x30
defm V_MAX_I16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x30a, "v_max_i16">;
defm V_MIN_U16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x30b, "v_min_u16">;
defm V_MIN_I16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x30c, "v_min_i16">;
-defm V_ADD_NC_I16 : VOP3_Realtriple_with_name_gfx11_gfx12<0x30d, "V_ADD_I16", "v_add_nc_i16">;
-defm V_SUB_NC_I16 : VOP3_Realtriple_with_name_gfx11_gfx12<0x30e, "V_SUB_I16", "v_sub_nc_i16">;
+defm V_ADD_NC_I16 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x30d, "V_ADD_I16", "v_add_nc_i16">;
+defm V_SUB_NC_I16 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x30e, "V_SUB_I16", "v_sub_nc_i16">;
defm V_PACK_B32_F16 : VOP3_Realtriple_gfx11_gfx12<0x311>;
defm V_CVT_PK_NORM_I16_F16 : VOP3_Realtriple_with_name_gfx11_gfx12<0x312, "V_CVT_PKNORM_I16_F16" , "v_cvt_pk_norm_i16_f16" >;
defm V_CVT_PK_NORM_U16_F16 : VOP3_Realtriple_with_name_gfx11_gfx12<0x313, "V_CVT_PKNORM_U16_F16" , "v_cvt_pk_norm_u16_f16" >;
diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td
index 05a7d907d237ae..0c21ef29cc3c74 100644
--- a/llvm/lib/Target/AMDGPU/VOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td
@@ -111,7 +111,7 @@ class VOP3_Pseudo <string opName, VOPProfile P, list<dag> pattern = [],
bit HasFP8DstByteSel = P.HasFP8DstByteSel;
- let AsmOperands = !if(isVop3OpSel,
+ let AsmOperands = !if(!and(!not(P.IsTrue16), isVop3OpSel),
P.AsmVOP3OpSel,
!if(!and(isVOP3P, P.IsPacked), P.AsmVOP3P, P.Asm64));
@@ -178,6 +178,7 @@ class VOP3_Real <VOP_Pseudo ps, int EncodingFamily, string asm_name = ps.Mnemoni
let SubtargetPredicate = ps.SubtargetPredicate;
let WaveSizePredicate = ps.WaveSizePredicate;
let OtherPredicates = ps.OtherPredicates;
+ let True16Predicate = ps.True16Predicate;
let AsmMatchConverter = ps.AsmMatchConverter;
let AsmVariantName = ps.AsmVariantName;
let Constraints = ps.Constraints;
@@ -242,6 +243,41 @@ class VOP3a<VOPProfile P> : Enc64 {
let Inst{63} = !if(P.HasSrc2Mods, src2_modifiers{0}, 0);
}
+// To avoid having different version of every type of operand depending on if
+// they are part of a True16 instruction or not, the operand encoding should be
+// the same for SGPR, imm, and VGPR_32 whether the instruction is True16 or not.
+class VOP3a_t16<VOPProfile P> : Enc64 {
+ bits<11> vdst;
+ bits<4> src0_modifiers;
+ bits<11> src0;
+ bits<3> src1_modifiers;
+ bits<11> src1;
+ bits<3> src2_modifiers;
+ bits<11> src2;
+ bits<1> clamp;
+ bits<2> omod;
+
+ let Inst{7-0} = !if(P.EmitDst, vdst{7-0}, 0);
+ let Inst{8} = !if(P.HasSrc0Mods, src0_modifiers{1}, 0);
+ let Inst{9} = !if(P.HasSrc1Mods, src1_modifiers{1}, 0);
+ let Inst{10} = !if(P.HasSrc2Mods, src2_modifiers{1}, 0);
+ // 16-bit select fields which can be interpreted as OpSel or hi/lo suffix
+ let Inst{11} = !if(P.HasSrc0Mods, src0_modifiers{2}, 0);
+ let Inst{12} = !if(P.HasSrc1Mods, src1_modifiers{2}, 0);
+ let Inst{13} = !if(P.HasSrc2Mods, src2_modifiers{2}, 0);
+ let Inst{14} = !if(!and(P.HasDst, P.HasSrc0Mods), src0_modifiers{3}, 0);
+ let Inst{15} = !if(P.HasClamp, clamp{0}, 0);
+
+ let Inst{31-26} = 0x35;
+ let Inst{40-32} = !if(P.HasSrc0, src0{8-0}, 0);
+ let Inst{49-41} = !if(P.HasSrc1, src1{8-0}, 0);
+ let Inst{58-50} = !if(P.HasSrc2, src2{8-0}, 0);
+ let Inst{60-59} = !if(P.HasOMod, omod, 0);
+ let Inst{61} = !if(P.HasSrc0Mods, src0_modifiers{0}, 0);
+ let Inst{62} = !if(P.HasSrc1Mods, src1_modifiers{0}, 0);
+ let Inst{63} = !if(P.HasSrc2Mods, src2_modifiers{0}, 0);
+}
+
class VOP3a_gfx6_gfx7<bits<9> op, VOPProfile p> : VOP3a<p> {
let Inst{11} = !if(p.HasClamp, clamp{0}, 0);
let Inst{25-17} = op;
@@ -272,6 +308,10 @@ class VOP3e_gfx10<bits<10> op, VOPProfile p> : VOP3a_gfx10<op, p> {
class VOP3e_gfx11_gfx12<bits<10> op, VOPProfile p> : VOP3e_gfx10<op, p>;
+class VOP3e_t16_gfx11_gfx12<bits<10> op, VOPProfile p> : VOP3a_t16<p> {
+ let Inst{25-16} = op;
+}
+
class VOP3e_vi <bits<10> op, VOPProfile P> : VOP3a_vi <op, P> {
bits<8> vdst;
let Inst{7-0} = !if(P.EmitDst, vdst{7-0}, 0);
@@ -736,7 +776,12 @@ class VOP3_DPPe_Fields : VOP3_DPPe_Fields_Base {
bits<8> src0;
}
+class VOP3_DPPe_Fields_t16 : VOP3_DPPe_Fields_Base {
+ bits<11> src0;
+}
+
// Common refers to common between DPP and DPP8
+// Base refers to a shared base between T16 and regular instructions
class VOP3_DPPe_Common_Base<bits<10> op, VOPProfile P> : Enc96 {
bits<4> src0_modifiers;
bits<3> src1_modifiers;
@@ -748,7 +793,7 @@ class VOP3_DPPe_Common_Base<bits<10> op, VOPProfile P> : Enc96 {
let Inst{8} = !if(P.HasSrc0Mods, src0_modifiers{1}, 0);
let Inst{9} = !if(P.HasSrc1Mods, src1_modifiers{1}, 0);
let Inst{10} = !if(P.HasSrc2Mods, src2_modifiers{1}, 0);
- // OPSEL must be set such that the low result only uses low inputs, and the high result only uses high inputs.
+ // 16-bit select fields which can be interpreted as OpSel or hi/lo suffix
let Inst{11} = !if(P.HasOpSel, !if(P.HasSrc0Mods, src0_modifiers{2}, 0),
!if(P.IsFP8SrcByteSel, byte_sel{1}, ?));
let Inst{12} = !if(P.HasOpSel, !if(P.HasSrc1Mods, src1_modifiers{2}, 0),
@@ -777,6 +822,16 @@ class VOP3_DPPe_Common<bits<10> op, VOPProfile P> : VOP3_DPPe_Common_Base<op, P>
let Inst{58-50} = !if(P.HasSrc2, src2, 0);
}
+class VOP3_DPPe_Common_t16<bits<10> op, VOPProfile P> : VOP3_DPPe_Common_Base<op, P> {
+ bits<11> vdst;
+ bits<11> src1;
+ bits<11> src2;
+
+ let Inst{7-0} = !if(P.EmitDst, vdst{7-0}, 0);
+ let Inst{49-41} = !if(P.HasSrc1, src1{8-0}, 0);
+ let Inst{58-50} = !if(P.HasSrc2, src2{8-0}, 0);
+}
+
class VOP3P_DPPe_Common_Base<bits<7> op, VOPProfile P> : Enc96 {
bits<4> src0_modifiers;
bits<4> src1_modifiers;
@@ -786,6 +841,7 @@ class VOP3P_DPPe_Common_Base<bits<7> op, VOPProfile P> : Enc96 {
let Inst{8} = !if(P.HasSrc0Mods, src0_modifiers{1}, 0); // neg_hi src0
let Inst{9} = !if(P.HasSrc1Mods, src1_modifiers{1}, 0); // neg_hi src1
let Inst{10} = !if(P.HasSrc2Mods, src2_modifiers{1}, 0); // neg_hi src2
+ // OPSEL must be set such that the low result only uses low inputs, and the high result only uses high inputs.
let Inst{11} = !if(!and(P.HasSrc0, P.HasOpSel), src0_modifiers{2}, 0); // op_sel(0)
let Inst{12} = !if(!and(P.HasSrc1, P.HasOpSel), src1_modifiers{2}, 0); // op_sel(1)
let Inst{13} = !if(!and(P.HasSrc2, P.HasOpSel), src2_modifiers{2}, 0); // op_sel(2)
@@ -810,6 +866,16 @@ class VOP3P_DPPe_Common<bits<7> op, VOPProfile P> : VOP3P_DPPe_Common_Base<op, P
let Inst{58-50} = !if(P.HasSrc2, src2, 0);
}
+class VOP3P_DPPe_Common_t16<bits<7> op, VOPProfile P> : VOP3P_DPPe_Common_Base<op, P> {
+ bits<11> vdst;
+ bits<11> src1;
+ bits<11> src2;
+
+ let Inst{7-0} = vdst{7-0};
+ let Inst{49-41} = !if(P.HasSrc1, src1{8-0}, 0);
+ let Inst{58-50} = !if(P.HasSrc2, src2{8-0}, 0);
+}
+
class VOP_DPP_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[],
dag Ins = P.InsDPP, string asmOps = P.AsmDPP> :
VOP_Pseudo<OpName, "_dpp", P, P.OutsDPP, Ins, asmOps, pattern> {
@@ -870,6 +936,7 @@ class VOP_DPP_Real <VOP_DPP_Pseudo ps, int EncodingFamily> :
// Copy relevant pseudo op flags
let isConvergent = ps.isConvergent;
let SubtargetPredicate = ps.SubtargetPredicate;
+ let True16Predicate = ps.True16Predicate;
let AssemblerPredicate = ps.AssemblerPredicate;
let OtherPredicates = ps.OtherPredicates;
let AsmMatchConverter = ps.AsmMatchConverter;
@@ -928,11 +995,29 @@ class VOP3_DPP_Base <string OpName, VOPProfile P, bit IsDPP16,
let Size = 12;
}
+class VOP3_DPP_Enc <bits<10> op, VOPProfile P, bit IsDPP16> :
+ VOP3_DPPe_Common<op, P>,
+ VOP3_DPPe_Fields {
+
+ let Inst{40-32} = 0xfa;
+ let Inst{71-64} = !if(P.HasSrc0, src0{7-0}, 0);
+ let Inst{80-72} = dpp_ctrl;
+ let Inst{82} = !if(IsDPP16, fi, ?);
+ let Inst{83} = bound_ctrl;
+
+ // Inst{87-84} ignored by hw
+ let Inst{91-88} = bank_mask;
+ let Inst{95-92} = row_mask;
+}
+
class VOP3_DPP <bits<10> op, string OpName, VOPProfile P, bit IsDPP16,
dag InsDPP = !if(IsDPP16, P.InsVOP3DPP16, P.InsVOP3DPP),
string AsmDPP = !if(IsDPP16, P.AsmVOP3DPP16, P.AsmVOP3DPP)> :
- VOP3_DPP_Base<OpName, P, IsDPP16, InsDPP, AsmDPP>, VOP3_DPPe_Common<op, P>,
- VOP3_DPPe_Fields {
+ VOP3_DPP_Base<OpName, P, IsDPP16, InsDPP, AsmDPP>, VOP3_DPP_Enc<op, P, IsDPP16>;
+
+class VOP3_DPP_Enc_t16<bits<10> op, VOPProfile P, bit IsDPP16 >
+ : VOP3_DPPe_Common_t16<op, P>,
+ VOP3_DPPe_Fields_t16 {
let Inst{40-32} = 0xfa;
let Inst{71-64} = !if(P.HasSrc0, src0{7-0}, 0);
@@ -945,6 +1030,13 @@ class VOP3_DPP <bits<10> op, string OpName, VOPProfile P, bit IsDPP16,
let Inst{95-92} = row_mask;
}
+class VOP3_DPP_t16<bits<10> op, string OpName, VOPProfile P, bit IsDPP16,
+ dag InsDPP = !if (IsDPP16, P.InsVOP3DPP16, P.InsVOP3DPP),
+ string AsmDPP = !if (IsDPP16, P.AsmVOP3DPP16, P.AsmVOP3DPP)>
+ : VOP3_DPP_Base<OpName, P, IsDPP16, InsDPP, AsmDPP>,
+ VOP3_DPP_Enc_t16<op, P, IsDPP16> {
+}
+
class VOP3P_DPP <bits<7> op, string OpName, VOPProfile P, bit IsDPP16,
dag InsDPP = !if(IsDPP16, P.InsVOP3DPP16, P.InsVOP3DPP),
string AsmDPP = !if(IsDPP16, P.AsmVOP3DPP16, P.AsmVOP3DPP)> :
@@ -979,6 +1071,12 @@ class VOP3_DPP8e_Fields {
bits<9> fi;
}
+class VOP3_DPP8e_Fields_t16 {
+ bits<11> src0;
+ bits<24> dpp8;
+ bits<9> fi;
+}
+
class VOP_DPP8_Base<string OpName, VOPProfile P, dag InsDPP8 = P.InsDPP8, string AsmDPP8 = P.AsmDPP8> :
InstSI<P.OutsDPP8, InsDPP8, OpName#AsmDPP8, []> {
@@ -1011,16 +1109,28 @@ class VOP3_DPP8_Base<string OpName, VOPProfile P> :
let Size = 12;
}
+class VOP3_DPP8_Enc <bits<10> op, VOPProfile P> :
+ VOP3_DPPe_Common<op, P>,
+ VOP3_DPP8e_Fields {
+ let Inst{40-32} = fi;
+ let Inst{71-64} = !if(P.HasSrc0, src0{7-0}, 0);
+ let Inst{95-72} = dpp8{23-0};
+}
class VOP3_DPP8<bits<10> op, string OpName, VOPProfile P> :
- VOP3_DPP8_Base<OpName, P>, VOP3_DPPe_Common<op, P>,
- VOP3_DPP8e_Fields {
+ VOP3_DPP8_Base<OpName, P>, VOP3_DPP8_Enc<op, P>;
+class VOP3_DPP8_Enc_t16 <bits<10> op, VOPProfile P> :
+ VOP3_DPPe_Common_t16<op, P>,
+ VOP3_DPP8e_Fields_t16 {
let Inst{40-32} = fi;
let Inst{71-64} = !if(P.HasSrc0, src0{7-0}, 0);
let Inst{95-72} = dpp8{23-0};
}
+class VOP3_DPP8_t16<bits<10> op, string OpName, VOPProfile P> :
+ VOP3_DPP8_Base<OpName, P>, VOP3_DPP8_Enc_t16<op, P>;
+
class VOP3P_DPP8<bits<7> op, string OpName, VOPProfile P> :
VOP3_DPP8_Base<OpName, P>, VOP3P_DPPe_Common<op, P>,
VOP3_DPP8e_Fields {
@@ -1273,6 +1383,32 @@ class VOP3_Profile<VOPProfile P, VOP3Features Features = VOP3_REGULAR> : VOP3_Pr
}
+class VOP3_Profile_True16<VOPProfile P, VOP3Features Features = VOP3_REGULAR> : VOPProfile_True16<P> {
+
+ let HasClamp = !if(Features.HasClamp, 1, P.HasClamp);
+ let HasOpSel = !if(Features.HasOpSel, 1, P.HasOpSel);
+ let IsMAI = !if(Features.IsMAI, 1, P.IsMAI);
+ let IsPacked = !if(Features.IsPacked, 1, P.IsPacked);
+
+ let HasModifiers =
+ !if (Features.IsMAI, 0,
+ !or(Features.IsPacked, Features.HasOpSel, P.HasModifiers));
+ let IsSingle = 1;
+}
+
+class VOP3_Profile_Fake16<VOPProfile P, VOP3Features Features = VOP3_REGULAR> : VOPProfile_Fake16<P> {
+
+ let HasClamp = !if(Features.HasClamp, 1, P.HasClamp);
+ let HasOpSel = !if(Features.HasOpSel, 1, P.HasOpSel);
+ let IsMAI = !if(Features.IsMAI, 1, P.IsMAI);
+ let IsPacked = !if(Features.IsPacked, 1, P.IsPacked);
+
+ let HasModifiers =
+ !if (Features.IsMAI, 0,
+ !or(Features.IsPacked, Features.HasOpSel, P.HasModifiers));
+ let IsSingle = 1;
+}
+
// consistently gives instructions a _e64 suffix
multiclass VOP3Inst_Pseudo_Wrapper<string opName, VOPProfile P, list<dag> pattern = [], bit VOP3Only = 0> {
def _e64 : VOP3_Pseudo<opName, P, pattern, VOP3Only>;
@@ -1325,6 +1461,28 @@ multiclass VOP3PseudoScalarInst<string OpName, VOPProfile P,
i32:$omod))))]>;
}
+multiclass VOP3Inst_t16_with_profiles<string OpName, VOPProfile P, VOPProfile P_t16,
+ VOPProfile P_fake16,
+ SDPatternOperator node = null_frag,
+ SDPatternOperator node_t16 = node> {
+ let True16Predicate = NotHasTrue16BitInsts in {
+ defm NAME : VOP3Inst<OpName, P, node>;
+ }
+ let True16Predicate = UseRealTrue16Insts in {
+ defm _t16 : VOP3Inst<OpName # "_t16", P_t16, node_t16>;
+ }
+ let True16Predicate = UseFakeTrue16Insts in {
+ defm _fake16 : VOP3Inst<OpName # "_fake16", P_fake16, node>;
+ }
+}
+
+multiclass VOP3Inst_t16<string OpName, VOPProfile P,
+ SDPatternOperator node = null_frag,
+ SDPatternOperator node_t16 = node>
+ : VOP3Inst_t16_with_profiles<OpName, VOP3_Profile<P, VOP3_OPSEL>,
+ VOP3_Profile_True16<P, VOP3_OPSEL>, VOP3_Profile_Fake16<P, VOP3_OPSEL>,
+ node, node_t16>;
+
//===----------------------------------------------------------------------===//
// VOP3 DPP
//===----------------------------------------------------------------------===//
@@ -1342,17 +1500,43 @@ class Base_VOP3_DPP16<bits<10> op, VOP_DPP_Pseudo ps, string opName = ps.OpName>
let OtherPredicates = ps.OtherPredicates;
}
+class Base_VOP3_DPP16_t16<bits<10> op, VOP_DPP_Pseudo ps,
+ string opName = ps.OpName>
+ : VOP3_DPP_t16<op, opName, ps.Pfl, 1> {
+ let VOP3_OPSEL = ps.Pfl.HasOpSel;
+ let IsDOT = ps.IsDOT;
+ let hasSideEffects = ps.hasSideEffects;
+ let Defs = ps.Defs;
+ let SchedRW = ps.SchedRW;
+ let Uses = ps.Uses;
+ let AssemblerPredicate = HasDPP16;
+ let SubtargetPredicate = HasDPP16;
+ let OtherPredicates = ps.OtherPredicates;
+}
+
class VOP3_DPP16<bits<10> op, VOP_DPP_Pseudo ps, int subtarget,
string opName = ps.OpName>
: Base_VOP3_DPP16<op, ps, opName>, SIMCInstr<ps.PseudoInstr, subtarget>;
+class VOP3_DPP16_t16<bits<10> op, VOP_DPP_Pseudo ps, int subtarget,
+ string opName = ps.OpName>
+ : Base_VOP3_DPP16_t16<op, ps, opName>, SIMCInstr<ps.PseudoInstr, subtarget>;
+
class VOP3_DPP16_Gen<bits<10> op, VOP_DPP_Pseudo ps, GFXGen Gen,
- string opName = ps.OpName> :
- VOP3_DPP16 <op, ps, Gen.Subtarget, opName> {
+ string opName = ps.OpName>
+ : VOP3_DPP16 <op, ps, Gen.Subtarget, opName> {
let AssemblerPredicate = Gen.AssemblerPredicate;
- let True16Predicate = !if(ps.Pfl.IsRealTrue16, UseRealTrue16Insts, NoTrue16Predicate);
- let DecoderNamespace = Gen.DecoderNamespace#
- !if(ps.Pfl.IsRealTrue16, "", "_FAKE16");
+ let DecoderNamespace = Gen.DecoderNamespace;
+}
+
+class VOP3_DPP16_Gen_t16<bits<10> op, VOP_DPP_Pseudo ps, GFXGen Gen,
+ string opName = ps.OpName>
+ : VOP3_DPP16_t16<op, ps, Gen.Subtarget, opName> {
+ let True16Predicate =
+ !if (ps.Pfl.IsRealTrue16, UseRealTrue16Insts, NoTrue16Predicate);
+ let AssemblerPredicate = Gen.AssemblerPredicate;
+ let DecoderNamespace =
+ Gen.DecoderNamespace #!if (ps.Pfl.IsRealTrue16, "", "_FAKE16");
}
class Base_VOP3_DPP8<bits<10> op, VOP_Pseudo ps, string opName = ps.OpName>
@@ -1366,6 +1550,20 @@ class Base_VOP3_DPP8<bits<10> op, VOP_Pseudo ps, string opName = ps.OpName>
let SubtargetPredicate = ps.SubtargetPredicate;
let OtherPredicates = ps.OtherPredicates;
+ let True16Predicate = ps.True16Predicate;
+}
+
+class Base_VOP3_DPP8_t16<bits<10> op, VOP_Pseudo ps, string opName = ps.OpName>
+ : VOP3_DPP8_t16<op, opName, ps.Pfl> {
+ let VOP3_OPSEL = ps.Pfl.HasOpSel;
+ let IsDOT = ps.IsDOT;
+ let hasSideEffects = ps.hasSideEffects;
+ let Defs = ps.Defs;
+ let SchedRW = ps.SchedRW;
+ let Uses = ps.Uses;
+
+ let OtherPredicates = ps.OtherPredicates;
+ let True16Predicate = ps.True16Predicate;
}
class Base_VOP3b_DPP16<bits<10> op, VOP_DPP_Pseudo ps,
@@ -1381,6 +1579,12 @@ class VOP3b_DPP8_Base<bits<10> op, VOP_Pseudo ps, string opName = ps.OpName>
let Inst{14 - 8} = sdst;
}
+class VOP3b_DPP8_Base_t16<bits<10> op, VOP_Pseudo ps, string opName = ps.OpName>
+ : Base_VOP3_DPP8<op, ps, opName> {
+ bits<8> sdst;
+ let Inst{14 - 8} = sdst{7-1};
+}
+
//===----------------------------------------------------------------------===//
// VOP3 GFX11, GFX12
//===----------------------------------------------------------------------===//
@@ -1420,10 +1624,11 @@ multiclass VOP3Dot_Real_Base<GFXGen Gen, bits<10> op, string opName = NAME,
}
multiclass VOP3_Real_with_name<GFXGen Gen, bits<10> op, string opName,
- string asmName, bit isSingle = 0> {
+ string asmName, string pseudo_mnemonic = "", bit isSingle = 0> {
defvar ps = !cast<VOP_Pseudo>(opName#"_e64");
let AsmString = asmName # ps.AsmOperands,
IsSingle = !or(isSingle, ps.Pfl.IsSingle) in {
+ // FIXME-TRUE16 support FP8 instructions properly
if ps.Pfl.IsFP8SrcByteSel then {
def _e64#Gen.Suffix :
VOP3_Real_Gen<ps, Gen>,
@@ -1432,17 +1637,27 @@ multiclass VOP3_Real_with_name<GFXGen Gen, bits<10> op, string opName,
def _e64#Gen.Suffix :
VOP3_Real_Gen<ps, Gen>,
VOP3FP8OpSel_dst_bytesel_gfx11_gfx12<op, ps.Pfl>;
- } else if ps.Pfl.HasOpSel then {
- def _e64#Gen.Suffix :
- VOP3_Real_Gen<ps, Gen>,
- VOP3OpSel_gfx11_gfx12<op, ps.Pfl>;
} else {
- def _e64#Gen.Suffix :
- VOP3_Real_Gen<ps, Gen>,
- VOP3e_gfx11_gfx12<op, ps.Pfl>;
+ if ps.Pfl.IsRealTrue16 then {
+ def _e64#Gen.Suffix :
+ VOP3_Real_Gen<ps, Gen>,
+ VOP3e_t16_gfx11_gfx12<op, ps.Pfl>;
+ } else {
+ if ps.Pfl.HasOpSel then {
+ def _e64#Gen.Suffix :
+ VOP3_Real_Gen<ps, Gen>,
+ VOP3OpSel_gfx11_gfx12<op, ps.Pfl>;
+ } else {
+ def _e64#Gen.Suffix :
+ VOP3_Real_Gen<ps, Gen>,
+ VOP3e_gfx11_gfx12<op, ps.Pfl>;
+ }
+ }
}
}
- def Gen.Suffix#"_VOP3_alias" : LetDummies, AMDGPUMnemonicAlias<ps.Mnemonic, asmName> {
+ def Gen.Suffix#"_VOP3_alias" : LetDummies,
+ AMDGPUMnemonicAlias<!if(!empty(pseudo_mnemonic),
+ ps.Mnemonic, pseudo_mnemonic), asmName, ""> {
let AssemblerPredicate = Gen.AssemblerPredicate;
}
}
@@ -1456,8 +1671,13 @@ multiclass VOP3_Real_No_Suffix<GFXGen Gen, bits<10> op, string opName = NAME> {
}
multiclass VOP3_Real_dpp_Base<GFXGen Gen, bits<10> op, string opName = NAME> {
- def _e64_dpp#Gen.Suffix :
- VOP3_DPP16_Gen<op, !cast<VOP_DPP_Pseudo>(opName#"_e64"#"_dpp"), Gen>;
+ defvar ps = !cast<VOP_DPP_Pseudo>(opName#"_e64"#"_dpp");
+ if ps.Pfl.IsTrue16 then
+ def _e64_dpp#Gen.Suffix :
+ VOP3_DPP16_Gen_t16<op, ps, Gen>;
+ else
+ def _e64_dpp#Gen.Suffix :
+ VOP3_DPP16_Gen<op, ps, Gen>;
}
multiclass VOP3Dot_Real_dpp_Base<GFXGen Gen, bits<10> op, string opName = NAME> {
@@ -1552,18 +1772,14 @@ multiclass VOP3Only_Realtriple<GFXGen Gen, bits<10> op> :
VOP3_Realtriple<Gen, op, 1>;
multiclass VOP3_Realtriple_with_name<GFXGen Gen, bits<10> op, string opName,
- string asmName, bit isSingle = 0> :
- VOP3_Real_with_name<Gen, op, opName, asmName, isSingle>,
+ string asmName, string pseudo_mnemonic = "", bit isSingle = 0> :
+ VOP3_Real_with_name<Gen, op, opName, asmName, pseudo_mnemonic, isSingle>,
VOP3_Real_dpp_with_name<Gen, op, opName, asmName>,
VOP3_Real_dpp8_with_name<Gen, op, opName, asmName>;
multiclass VOP3Only_Realtriple_with_name<GFXGen Gen, bits<10> op, string opName,
string asmName> :
- VOP3_Realtriple_with_name<Gen, op, opName, asmName, 1>;
-
-multiclass VOP3Only_Realtriple_t16<GFXGen Gen, bits<10> op, string asmName,
- string opName = NAME>
- : VOP3Only_Realtriple_with_name<Gen, op, opName, asmName>;
+ VOP3_Realtriple_with_name<Gen, op, opName, asmName, "", 1>;
multiclass VOP3be_Realtriple<
GFXGen Gen, bits<10> op, bit isSingle = 0, string opName = NAME,
@@ -1579,6 +1795,16 @@ multiclass VOP3beOnly_Realtriple<GFXGen Gen, bits<10> op> :
// VOP3 GFX11
//===----------------------------------------------------------------------===//
+// VOP1 and VOP2 depend on these triple defs
+
+multiclass VOP3_Realtriple_t16_gfx11<bits<10> op, string asmName, string opName = NAME,
+ string pseudo_mnemonic = "", bit isSingle = 0> :
+ VOP3_Realtriple_with_name<GFX11Gen, op, opName, asmName, pseudo_mnemonic, isSingle>;
+
+multiclass VOP3Only_Realtriple_t16_gfx11<bits<10> op, string asmName,
+ string opName = NAME, string pseudo_mnemonic = "">
+ : VOP3_Realtriple_t16_gfx11<op, asmName, opName, pseudo_mnemonic, 1>;
+
multiclass VOP3be_Real_gfx11<bits<10> op, string opName, string asmName,
bit isSingle = 0> :
VOP3be_Real<GFX11Gen, op, opName, asmName, isSingle>;
@@ -1591,10 +1817,6 @@ multiclass VOP3_Realtriple_gfx11<bits<10> op, bit isSingle = 0,
string opName = NAME> :
VOP3_Realtriple<GFX11Gen, op, isSingle, opName>;
-multiclass VOP3Only_Realtriple_t16_gfx11<bits<10> op, string asmName,
- string opName = NAME>
- : VOP3Only_Realtriple_with_name<GFX11Gen, op, opName, asmName>;
-
//===----------------------------------------------------------------------===//
// VOP3 GFX12
//===----------------------------------------------------------------------===//
@@ -1610,6 +1832,16 @@ multiclass VOP3Only_Real_Base_gfx12<bits<10> op> :
multiclass VOP3Only_Realtriple_t16_gfx12<bits<10> op> :
VOP3Only_Realtriple<GFX12Gen, op>;
+multiclass VOP3_Realtriple_t16_gfx12<bits<10> op, string asmName, string opName = NAME,
+ string pseudo_mnemonic = "", bit isSingle = 0> :
+ VOP3_Realtriple_with_name<GFX12Gen, op, opName, asmName, pseudo_mnemonic, isSingle>;
+
+multiclass VOP3_Realtriple_t16_and_f16_gfx12<bits<10> op, string asmName, string opName = NAME,
+ string pseudo_mnemonic = "", bit isSingle = 0> {
+ defm opName#"_t16":VOP3_Realtriple_t16_gfx12<op, asmName, opName#"_t16", pseudo_mnemonic, isSingle>;
+ defm opName#"_fake16":VOP3_Realtriple_t16_gfx12<op, asmName, opName#"_fake16", pseudo_mnemonic, isSingle>;
+}
+
multiclass VOP3be_Real_with_name_gfx12<bits<10> op, string opName,
string asmName, bit isSingle = 0> {
defvar ps = !cast<VOP3_Pseudo>(opName#"_e64");
@@ -1624,18 +1856,14 @@ multiclass VOP3be_Real_with_name_gfx12<bits<10> op, string opName,
}
multiclass VOP3_Realtriple_with_name_gfx12<bits<10> op, string opName,
- string asmName, bit isSingle = 0> :
- VOP3_Realtriple_with_name<GFX12Gen, op, opName, asmName, isSingle>;
+ string asmName, string pseudo_mnemonic = "", bit isSingle = 0> :
+ VOP3_Realtriple_with_name<GFX12Gen, op, opName, asmName, pseudo_mnemonic, isSingle>;
multiclass VOP3Only_Realtriple_with_name_gfx11_gfx12<bits<10> op, string opName,
string asmName> :
VOP3Only_Realtriple_with_name<GFX11Gen, op, opName, asmName>,
VOP3Only_Realtriple_with_name<GFX12Gen, op, opName, asmName>;
-multiclass VOP3Only_Realtriple_with_name_t16_gfx12<bits<10> op, string asmName,
- string opName = NAME>
- : VOP3Only_Realtriple_with_name<GFX12Gen, op, opName, asmName>;
-
//===----------------------------------------------------------------------===//
include "VOPCInstructions.td"
@@ -1705,4 +1933,4 @@ def VOPTrue16Table : GenericTable {
let PrimaryKey = ["Opcode"];
let PrimaryKeyName = "getTrue16OpcodeHelper";
-}
\ No newline at end of file
+}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.s16.mir
index 11411c691c3901..1971cd80d5686b 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.s16.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.s16.mir
@@ -2,7 +2,7 @@
# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX6 %s
# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX6 %s
# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX10 %s
-# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX10 %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX11 %s
# Note: 16-bit instructions generally produce a 0 result in the high 16-bits on GFX8 and GFX9 and preserve high 16 bits on GFX10+
@@ -23,6 +23,7 @@ body: |
; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX6-NEXT: [[V_ADD_U16_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U16_e64 [[COPY]], [[COPY1]], 0, implicit $exec
; GFX6-NEXT: S_ENDPGM 0, implicit [[V_ADD_U16_e64_]]
+ ;
; GFX10-LABEL: name: add_s16
; GFX10: liveins: $vgpr0, $vgpr1
; GFX10-NEXT: {{ $}}
@@ -30,6 +31,14 @@ body: |
; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX10-NEXT: [[V_ADD_NC_U16_e64_:%[0-9]+]]:vgpr_32 = V_ADD_NC_U16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $exec
; GFX10-NEXT: S_ENDPGM 0, implicit [[V_ADD_NC_U16_e64_]]
+ ;
+ ; GFX11-LABEL: name: add_s16
+ ; GFX11: liveins: $vgpr0, $vgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX11-NEXT: [[V_ADD_NC_U16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_ADD_NC_U16_fake16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_ADD_NC_U16_fake16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_TRUNC %0
@@ -56,6 +65,7 @@ body: |
; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX6-NEXT: [[V_ADD_U16_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U16_e64 [[COPY]], [[COPY1]], 0, implicit $exec
; GFX6-NEXT: S_ENDPGM 0, implicit [[V_ADD_U16_e64_]]
+ ;
; GFX10-LABEL: name: add_s16_zext_to_s32
; GFX10: liveins: $vgpr0, $vgpr1
; GFX10-NEXT: {{ $}}
@@ -65,6 +75,16 @@ body: |
; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
; GFX10-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[V_ADD_NC_U16_e64_]], implicit $exec
; GFX10-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e64_]]
+ ;
+ ; GFX11-LABEL: name: add_s16_zext_to_s32
+ ; GFX11: liveins: $vgpr0, $vgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX11-NEXT: [[V_ADD_NC_U16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_ADD_NC_U16_fake16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $exec
+ ; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
+ ; GFX11-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[V_ADD_NC_U16_fake16_e64_]], implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_TRUNC %0
@@ -91,12 +111,20 @@ body: |
; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX6-NEXT: [[V_SUB_U16_e64_:%[0-9]+]]:vgpr_32 = V_SUB_U16_e64 [[COPY]], 64, 0, implicit $exec
; GFX6-NEXT: S_ENDPGM 0, implicit [[V_SUB_U16_e64_]]
+ ;
; GFX10-LABEL: name: add_s16_neg_inline_const_64
; GFX10: liveins: $vgpr0
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX10-NEXT: [[V_SUB_NC_U16_e64_:%[0-9]+]]:vgpr_32 = V_SUB_NC_U16_e64 0, [[COPY]], 0, 64, 0, 0, implicit $exec
; GFX10-NEXT: S_ENDPGM 0, implicit [[V_SUB_NC_U16_e64_]]
+ ;
+ ; GFX11-LABEL: name: add_s16_neg_inline_const_64
+ ; GFX11: liveins: $vgpr0
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-NEXT: [[V_SUB_NC_U16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_SUB_NC_U16_fake16_e64 0, [[COPY]], 0, 64, 0, 0, implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_SUB_NC_U16_fake16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s16) = G_TRUNC %0
%2:vgpr(s16) = G_CONSTANT i16 -64
@@ -121,6 +149,7 @@ body: |
; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX6-NEXT: [[V_SUB_U16_e64_:%[0-9]+]]:vgpr_32 = V_SUB_U16_e64 [[COPY]], 64, 0, implicit $exec
; GFX6-NEXT: S_ENDPGM 0, implicit [[V_SUB_U16_e64_]]
+ ;
; GFX10-LABEL: name: add_s16_neg_inline_const_64_zext_to_s32
; GFX10: liveins: $vgpr0
; GFX10-NEXT: {{ $}}
@@ -129,6 +158,15 @@ body: |
; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
; GFX10-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[V_SUB_NC_U16_e64_]], implicit $exec
; GFX10-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e64_]]
+ ;
+ ; GFX11-LABEL: name: add_s16_neg_inline_const_64_zext_to_s32
+ ; GFX11: liveins: $vgpr0
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-NEXT: [[V_SUB_NC_U16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_SUB_NC_U16_fake16_e64 0, [[COPY]], 0, 64, 0, 0, implicit $exec
+ ; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
+ ; GFX11-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[V_SUB_NC_U16_fake16_e64_]], implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s16) = G_TRUNC %0
%2:vgpr(s16) = G_CONSTANT i16 -64
diff --git a/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir b/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir
index 1151bde02ef62c..41b61f2e09a3d3 100644
--- a/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir
+++ b/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir
@@ -82,9 +82,9 @@ body: |
# Regression test for src_modifiers on base u16 opcode
# GCN-LABEL: name: vop3_u16
-# GCN: %5:vgpr_32 = V_ADD_NC_U16_e64_dpp %3, 0, %1, 0, %3, 0, 0, 1, 15, 15, 1, implicit $exec
-# GCN: %7:vgpr_32 = V_ADD_NC_U16_e64_dpp %3, 1, %5, 2, %5, 0, 0, 1, 15, 15, 1, implicit $exec
-# GCN: %9:vgpr_32 = V_ADD_NC_U16_e64 4, %8, 8, %7, 0, 0, implicit $exec
+# GCN: %5:vgpr_32 = V_ADD_NC_U16_fake16_e64_dpp %3, 0, %1, 0, %3, 0, 0, 1, 15, 15, 1, implicit $exec
+# GCN: %7:vgpr_32 = V_ADD_NC_U16_fake16_e64_dpp %3, 1, %5, 2, %5, 0, 0, 1, 15, 15, 1, implicit $exec
+# GCN: %9:vgpr_32 = V_ADD_NC_U16_fake16_e64 4, %8, 8, %7, 0, 0, implicit $exec
name: vop3_u16
tracksRegLiveness: true
body: |
@@ -96,11 +96,11 @@ body: |
%2:vgpr_32 = COPY $vgpr2
%3:vgpr_32 = IMPLICIT_DEF
%4:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
- %5:vgpr_32 = V_ADD_NC_U16_e64 0, %4, 0, %3, 0, 0, implicit $exec
+ %5:vgpr_32 = V_ADD_NC_U16_fake16_e64 0, %4, 0, %3, 0, 0, implicit $exec
%6:vgpr_32 = V_MOV_B32_dpp %3, %5, 1, 15, 15, 1, implicit $exec
- %7:vgpr_32 = V_ADD_NC_U16_e64 1, %6, 2, %5, 0, 0, implicit $exec
+ %7:vgpr_32 = V_ADD_NC_U16_fake16_e64 1, %6, 2, %5, 0, 0, implicit $exec
%8:vgpr_32 = V_MOV_B32_dpp %3, %7, 1, 15, 15, 1, implicit $exec
- %9:vgpr_32 = V_ADD_NC_U16_e64 4, %8, 8, %7, 0, 0, implicit $exec
+ %9:vgpr_32 = V_ADD_NC_U16_fake16_e64 4, %8, 8, %7, 0, 0, implicit $exec
...
name: vop3p
@@ -880,11 +880,11 @@ body: |
# Check op_sel is all 0s when combining
# GCN-LABEL: name: opsel_vop3
-# GCN: %4:vgpr_32 = V_ADD_I16_e64_dpp %2, 0, %0, 0, %1, 0, 0, 1, 15, 15, 1, implicit $exec
-# GCN: %6:vgpr_32 = V_ADD_I16_e64 4, %5, 0, %1, 0, 0, implicit $exec
-# GCN: %8:vgpr_32 = V_ADD_I16_e64 0, %7, 4, %1, 0, 0, implicit $exec
-# GCN: %10:vgpr_32 = V_ADD_I16_e64 4, %9, 4, %1, 0, 0, implicit $exec
-# GCN: %12:vgpr_32 = V_ADD_I16_e64 8, %11, 0, %1, 0, 0, implicit $exec
+# GCN: %4:vgpr_32 = V_ADD_I16_fake16_e64_dpp %2, 0, %0, 0, %1, 0, 0, 1, 15, 15, 1, implicit $exec
+# GCN: %6:vgpr_32 = V_ADD_I16_fake16_e64 4, %5, 0, %1, 0, 0, implicit $exec
+# GCN: %8:vgpr_32 = V_ADD_I16_fake16_e64 0, %7, 4, %1, 0, 0, implicit $exec
+# GCN: %10:vgpr_32 = V_ADD_I16_fake16_e64 4, %9, 4, %1, 0, 0, implicit $exec
+# GCN: %12:vgpr_32 = V_ADD_I16_fake16_e64 8, %11, 0, %1, 0, 0, implicit $exec
name: opsel_vop3
tracksRegLiveness: true
body: |
@@ -897,23 +897,23 @@ body: |
; Combine for op_sel:[0,0,0]
%3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
- %4:vgpr_32 = V_ADD_I16_e64 0, %3, 0, %1, 0, 0, implicit $exec
+ %4:vgpr_32 = V_ADD_I16_fake16_e64 0, %3, 0, %1, 0, 0, implicit $exec
; Do not combine for op_sel:[1,0,0]
%5:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
- %6:vgpr_32 = V_ADD_I16_e64 4, %5, 0, %1, 0, 0, implicit $exec
+ %6:vgpr_32 = V_ADD_I16_fake16_e64 4, %5, 0, %1, 0, 0, implicit $exec
; Do not combine for op_sel:[0,1,0]
%7:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
- %8:vgpr_32 = V_ADD_I16_e64 0, %7, 4, %1, 0, 0, implicit $exec
+ %8:vgpr_32 = V_ADD_I16_fake16_e64 0, %7, 4, %1, 0, 0, implicit $exec
; Do not combine for op_sel:[1,1,0]
%9:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
- %10:vgpr_32 = V_ADD_I16_e64 4, %9, 4, %1, 0, 0, implicit $exec
+ %10:vgpr_32 = V_ADD_I16_fake16_e64 4, %9, 4, %1, 0, 0, implicit $exec
; Do not combine for op_sel:[0,0,1] (dst_op_sel only)
%11:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
- %12:vgpr_32 = V_ADD_I16_e64 8, %11, 0, %1, 0, 0, implicit $exec
+ %12:vgpr_32 = V_ADD_I16_fake16_e64 8, %11, 0, %1, 0, 0, implicit $exec
...
# Check op_sel is all 0s and op_sel_hi is all 1s when combining
diff --git a/llvm/test/CodeGen/AMDGPU/isel-amdgpu-cs-chain-preserve-cc.ll b/llvm/test/CodeGen/AMDGPU/isel-amdgpu-cs-chain-preserve-cc.ll
index c62b4e565078e4..2e2a1094ba99ae 100644
--- a/llvm/test/CodeGen/AMDGPU/isel-amdgpu-cs-chain-preserve-cc.ll
+++ b/llvm/test/CodeGen/AMDGPU/isel-amdgpu-cs-chain-preserve-cc.ll
@@ -996,7 +996,7 @@ define amdgpu_cs_chain_preserve void @amdgpu_cs_chain_preserve_cc_i16(i16 inreg
; GISEL-GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr8
; GISEL-GFX11-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
; GISEL-GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]]
- ; GISEL-GFX11-NEXT: [[V_ADD_NC_U16_e64_:%[0-9]+]]:vgpr_32 = V_ADD_NC_U16_e64 0, [[COPY2]], 0, [[COPY1]], 0, 0, implicit $exec
+ ; GISEL-GFX11-NEXT: [[V_ADD_NC_U16_e64_:%[0-9]+]]:vgpr_32 = V_ADD_NC_U16_fake16_e64 0, [[COPY2]], 0, [[COPY1]], 0, 0, implicit $exec
; GISEL-GFX11-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[DEF]]
; GISEL-GFX11-NEXT: FLAT_STORE_SHORT [[COPY3]], [[V_ADD_NC_U16_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s16) into `ptr poison`)
; GISEL-GFX11-NEXT: S_ENDPGM 0
@@ -1020,7 +1020,7 @@ define amdgpu_cs_chain_preserve void @amdgpu_cs_chain_preserve_cc_i16(i16 inreg
; DAGISEL-GFX11-WF32-NEXT: {{ $}}
; DAGISEL-GFX11-WF32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr8
; DAGISEL-GFX11-WF32-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0
- ; DAGISEL-GFX11-WF32-NEXT: [[V_ADD_NC_U16_e64_:%[0-9]+]]:vgpr_32 = V_ADD_NC_U16_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $exec
+ ; DAGISEL-GFX11-WF32-NEXT: [[V_ADD_NC_U16_e64_:%[0-9]+]]:vgpr_32 = V_ADD_NC_U16_fake16_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $exec
; DAGISEL-GFX11-WF32-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
; DAGISEL-GFX11-WF32-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY [[DEF]]
; DAGISEL-GFX11-WF32-NEXT: FLAT_STORE_SHORT killed [[COPY2]], killed [[V_ADD_NC_U16_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s16) into `ptr poison`)
@@ -1032,7 +1032,7 @@ define amdgpu_cs_chain_preserve void @amdgpu_cs_chain_preserve_cc_i16(i16 inreg
; DAGISEL-GFX11-WF64-NEXT: {{ $}}
; DAGISEL-GFX11-WF64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr8
; DAGISEL-GFX11-WF64-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0
- ; DAGISEL-GFX11-WF64-NEXT: [[V_ADD_NC_U16_e64_:%[0-9]+]]:vgpr_32 = V_ADD_NC_U16_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $exec
+ ; DAGISEL-GFX11-WF64-NEXT: [[V_ADD_NC_U16_e64_:%[0-9]+]]:vgpr_32 = V_ADD_NC_U16_fake16_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $exec
; DAGISEL-GFX11-WF64-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
; DAGISEL-GFX11-WF64-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY [[DEF]]
; DAGISEL-GFX11-WF64-NEXT: FLAT_STORE_SHORT killed [[COPY2]], killed [[V_ADD_NC_U16_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s16) into `ptr poison`)
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_opsel.s b/llvm/test/MC/AMDGPU/gfx11_asm_opsel.s
new file mode 100644
index 00000000000000..b7b71e696cd102
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_opsel.s
@@ -0,0 +1,48 @@
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX11 %s
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX11 %s
+
+v_add_nc_i16 v5.h, null, exec_lo op_sel:[1,1,1]
+// GFX11: encoding: [0x05,0x58,0x0d,0xd7,0x7c,0xfc,0x00,0x00]
+
+v_add_nc_i16 v5.l, -1, exec_hi op_sel:[0,0,0]
+// GFX11: encoding: [0x05,0x00,0x0d,0xd7,0xc1,0xfe,0x00,0x00]
+
+v_add_nc_i16 v5.l, 0.5, m0 op_sel:[1,0,0]
+// GFX11: encoding: [0x05,0x08,0x0d,0xd7,0xf0,0xfa,0x00,0x00]
+
+v_add_nc_i16 v5.l, src_scc, vcc_lo op_sel:[0,1,0]
+// GFX11: encoding: [0x05,0x10,0x0d,0xd7,0xfd,0xd4,0x00,0x00]
+
+v_add_nc_i16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp
+// GFX11: encoding: [0xff,0xc0,0x0d,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+
+v_sub_nc_i16 v5.h, null, exec_lo op_sel:[1,1,1]
+// GFX11: encoding: [0x05,0x58,0x0e,0xd7,0x7c,0xfc,0x00,0x00]
+
+v_sub_nc_i16 v5.l, -1, exec_hi op_sel:[0,0,0]
+// GFX11: encoding: [0x05,0x00,0x0e,0xd7,0xc1,0xfe,0x00,0x00]
+
+v_sub_nc_i16 v5.l, 0.5, m0 op_sel:[1,0,0]
+// GFX11: encoding: [0x05,0x08,0x0e,0xd7,0xf0,0xfa,0x00,0x00]
+
+v_sub_nc_i16 v5.l, src_scc, vcc_lo op_sel:[0,1,0]
+// GFX11: encoding: [0x05,0x10,0x0e,0xd7,0xfd,0xd4,0x00,0x00]
+
+v_sub_nc_i16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp
+// GFX11: encoding: [0xff,0xc0,0x0e,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+
+v_sub_nc_u16 v5.h, null, exec_lo op_sel:[1,1,1]
+// GFX11: encoding: [0x05,0x58,0x04,0xd7,0x7c,0xfc,0x00,0x00]
+
+v_sub_nc_u16 v5.l, -1, exec_hi op_sel:[0,0,0]
+// GFX11: encoding: [0x05,0x00,0x04,0xd7,0xc1,0xfe,0x00,0x00]
+
+v_sub_nc_u16 v5.l, 0.5, m0 op_sel:[1,0,0]
+// GFX11: encoding: [0x05,0x08,0x04,0xd7,0xf0,0xfa,0x00,0x00]
+
+v_sub_nc_u16 v5.l, src_scc, vcc_lo op_sel:[0,1,0]
+// GFX11: encoding: [0x05,0x10,0x04,0xd7,0xfd,0xd4,0x00,0x00]
+
+v_sub_nc_u16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp
+// GFX11: encoding: [0xff,0xc0,0x04,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s
index 67038f4c8eec09..6b6d411982ebc4 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s
@@ -244,50 +244,59 @@ v_add_lshl_u32 v5, src_scc, vcc_lo, -1
v_add_lshl_u32 v255, 0xaf123456, vcc_hi, null
// GFX11: encoding: [0xff,0x00,0x47,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf]
-v_add_nc_i16 v5, v1, v2
-// GFX11: encoding: [0x05,0x00,0x0d,0xd7,0x01,0x05,0x02,0x00]
+v_add_nc_i16 v5.l, v1.h, v2.l
+// GFX11: [0x05,0x08,0x0d,0xd7,0x01,0x05,0x02,0x00]
-v_add_nc_i16 v5, v255, v255
-// GFX11: encoding: [0x05,0x00,0x0d,0xd7,0xff,0xff,0x03,0x00]
+v_add_nc_i16 v5.l, v255.l, v255.h
+// GFX11: [0x05,0x10,0x0d,0xd7,0xff,0xff,0x03,0x00]
-v_add_nc_i16 v5, s1, s2
-// GFX11: encoding: [0x05,0x00,0x0d,0xd7,0x01,0x04,0x00,0x00]
+v_add_nc_i16 v5.l, s1, s2
+// GFX11: [0x05,0x00,0x0d,0xd7,0x01,0x04,0x00,0x00]
-v_add_nc_i16 v5, s105, s105
-// GFX11: encoding: [0x05,0x00,0x0d,0xd7,0x69,0xd2,0x00,0x00]
+v_add_nc_i16 v5.l, s105, s105
+// GFX11: [0x05,0x00,0x0d,0xd7,0x69,0xd2,0x00,0x00]
-v_add_nc_i16 v5, vcc_lo, ttmp15
-// GFX11: encoding: [0x05,0x00,0x0d,0xd7,0x6a,0xf6,0x00,0x00]
+v_add_nc_i16 v5.l, vcc_lo, ttmp15
+// GFX11: [0x05,0x00,0x0d,0xd7,0x6a,0xf6,0x00,0x00]
-v_add_nc_i16 v5, vcc_hi, 0xfe0b
-// GFX11: encoding: [0x05,0x00,0x0d,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+v_add_nc_i16 v5.l, vcc_hi, 0xfe0b
+// GFX11: [0x05,0x00,0x0d,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
-v_add_nc_i16 v5, ttmp15, src_scc
-// GFX11: encoding: [0x05,0x00,0x0d,0xd7,0x7b,0xfa,0x01,0x00]
+v_add_nc_i16 v5.l, ttmp15, src_scc
+// GFX11: [0x05,0x00,0x0d,0xd7,0x7b,0xfa,0x01,0x00]
-v_add_nc_i16 v5, m0, 0.5
+v_add_nc_i16 v5.l, m0, 0.5
// GFX11: encoding: [0x05,0x00,0x0d,0xd7,0x7d,0xe0,0x01,0x00]
-v_add_nc_i16 v5, exec_lo, -1
-// GFX11: encoding: [0x05,0x00,0x0d,0xd7,0x7e,0x82,0x01,0x00]
+v_add_nc_i16 v5.l, exec_lo, -1
+// GFX11: [0x05,0x00,0x0d,0xd7,0x7e,0x82,0x01,0x00]
-v_add_nc_i16 v5, exec_hi, null
-// GFX11: encoding: [0x05,0x00,0x0d,0xd7,0x7f,0xf8,0x00,0x00]
+v_add_nc_i16 v5.l, exec_hi, null
+// GFX11: [0x05,0x00,0x0d,0xd7,0x7f,0xf8,0x00,0x00]
-v_add_nc_i16 v5, null, exec_lo op_sel:[1,1,1]
-// GFX11: encoding: [0x05,0x58,0x0d,0xd7,0x7c,0xfc,0x00,0x00]
+v_add_nc_i16 v5.l, null, exec_lo
+// GFX11: [0x05,0x00,0x0d,0xd7,0x7c,0xfc,0x00,0x00]
-v_add_nc_i16 v5, -1, exec_hi op_sel:[0,0,0]
-// GFX11: encoding: [0x05,0x00,0x0d,0xd7,0xc1,0xfe,0x00,0x00]
+v_add_nc_i16 v5.l, -1, exec_hi
+// GFX11: [0x05,0x00,0x0d,0xd7,0xc1,0xfe,0x00,0x00]
-v_add_nc_i16 v5, 0.5, m0 op_sel:[1,0,0]
+v_add_nc_i16 v5.l, 0.5, m0 op_sel:[1,0,0]
// GFX11: encoding: [0x05,0x08,0x0d,0xd7,0xf0,0xfa,0x00,0x00]
-v_add_nc_i16 v5, src_scc, vcc_lo op_sel:[0,1,0]
-// GFX11: encoding: [0x05,0x10,0x0d,0xd7,0xfd,0xd4,0x00,0x00]
+v_add_nc_i16 v5.l, src_scc, vcc_lo
+// GFX11: [0x05,0x00,0x0d,0xd7,0xfd,0xd4,0x00,0x00]
-v_add_nc_i16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp
-// GFX11: encoding: [0xff,0xc0,0x0d,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+v_add_nc_i16 v255.h, 0xfe0b, vcc_hi clamp
+// GFX11: [0xff,0xc0,0x0d,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+
+v_add_nc_i16 v5.l, v1.h, v2.l
+// GFX11: [0x05,0x08,0x0d,0xd7,0x01,0x05,0x02,0x00]
+
+v_add_nc_i16 v5.l, v255.l, v255.h
+// GFX11: [0x05,0x10,0x0d,0xd7,0xff,0xff,0x03,0x00]
+
+v_add_nc_i16 v255.h, 0xfe0b, vcc_hi clamp
+// GFX11: [0xff,0xc0,0x0d,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
v_add_nc_i32 v5, v1, v2
// GFX11: encoding: [0x05,0x00,0x26,0xd7,0x01,0x05,0x02,0x00]
@@ -334,50 +343,59 @@ v_add_nc_i32 v5, src_scc, vcc_lo
v_add_nc_i32 v255, 0xaf123456, vcc_hi clamp
// GFX11: encoding: [0xff,0x80,0x26,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf]
-v_add_nc_u16 v5, v1, v2
-// GFX11: encoding: [0x05,0x00,0x03,0xd7,0x01,0x05,0x02,0x00]
+v_add_nc_u16 v5.l, v1.h, v2.l
+// GFX11: [0x05,0x08,0x03,0xd7,0x01,0x05,0x02,0x00]
-v_add_nc_u16 v5, v255, v255
-// GFX11: encoding: [0x05,0x00,0x03,0xd7,0xff,0xff,0x03,0x00]
+v_add_nc_u16 v5.l, v255.l, v255.h
+// GFX11: [0x05,0x10,0x03,0xd7,0xff,0xff,0x03,0x00]
-v_add_nc_u16 v5, s1, s2
-// GFX11: encoding: [0x05,0x00,0x03,0xd7,0x01,0x04,0x00,0x00]
+v_add_nc_u16 v5.l, s1, s2
+// GFX11: [0x05,0x00,0x03,0xd7,0x01,0x04,0x00,0x00]
-v_add_nc_u16 v5, s105, s105
-// GFX11: encoding: [0x05,0x00,0x03,0xd7,0x69,0xd2,0x00,0x00]
+v_add_nc_u16 v5.l, s105, s105
+// GFX11: [0x05,0x00,0x03,0xd7,0x69,0xd2,0x00,0x00]
-v_add_nc_u16 v5, vcc_lo, ttmp15
-// GFX11: encoding: [0x05,0x00,0x03,0xd7,0x6a,0xf6,0x00,0x00]
+v_add_nc_u16 v5.l, vcc_lo, ttmp15
+// GFX11: [0x05,0x00,0x03,0xd7,0x6a,0xf6,0x00,0x00]
-v_add_nc_u16 v5, vcc_hi, 0xfe0b
-// GFX11: encoding: [0x05,0x00,0x03,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+v_add_nc_u16 v5.l, vcc_hi, 0xfe0b
+// GFX11: [0x05,0x00,0x03,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
-v_add_nc_u16 v5, ttmp15, src_scc
-// GFX11: encoding: [0x05,0x00,0x03,0xd7,0x7b,0xfa,0x01,0x00]
+v_add_nc_u16 v5.l, ttmp15, src_scc
+// GFX11: [0x05,0x00,0x03,0xd7,0x7b,0xfa,0x01,0x00]
-v_add_nc_u16 v5, m0, 0.5
+v_add_nc_u16 v5.l, m0, 0.5
// GFX11: encoding: [0x05,0x00,0x03,0xd7,0x7d,0xe0,0x01,0x00]
-v_add_nc_u16 v5, exec_lo, -1
-// GFX11: encoding: [0x05,0x00,0x03,0xd7,0x7e,0x82,0x01,0x00]
+v_add_nc_u16 v5.l, exec_lo, -1
+// GFX11: [0x05,0x00,0x03,0xd7,0x7e,0x82,0x01,0x00]
-v_add_nc_u16 v5, exec_hi, null
-// GFX11: encoding: [0x05,0x00,0x03,0xd7,0x7f,0xf8,0x00,0x00]
+v_add_nc_u16 v5.l, exec_hi, null
+// GFX11: [0x05,0x00,0x03,0xd7,0x7f,0xf8,0x00,0x00]
-v_add_nc_u16 v5, null, exec_lo op_sel:[1,1,1]
-// GFX11: encoding: [0x05,0x58,0x03,0xd7,0x7c,0xfc,0x00,0x00]
+v_add_nc_u16 v5.l, null, exec_lo
+// GFX11: [0x05,0x00,0x03,0xd7,0x7c,0xfc,0x00,0x00]
-v_add_nc_u16 v5, -1, exec_hi op_sel:[0,0,0]
-// GFX11: encoding: [0x05,0x00,0x03,0xd7,0xc1,0xfe,0x00,0x00]
+v_add_nc_u16 v5.l, -1, exec_hi
+// GFX11: [0x05,0x00,0x03,0xd7,0xc1,0xfe,0x00,0x00]
-v_add_nc_u16 v5, 0.5, m0 op_sel:[1,0,0]
+v_add_nc_u16 v5.l, 0.5, m0 op_sel:[1,0,0]
// GFX11: encoding: [0x05,0x08,0x03,0xd7,0xf0,0xfa,0x00,0x00]
-v_add_nc_u16 v5, src_scc, vcc_lo op_sel:[0,1,0]
-// GFX11: encoding: [0x05,0x10,0x03,0xd7,0xfd,0xd4,0x00,0x00]
+v_add_nc_u16 v5.l, src_scc, vcc_lo
+// GFX11: [0x05,0x00,0x03,0xd7,0xfd,0xd4,0x00,0x00]
+
+v_add_nc_u16 v255.h, 0xfe0b, vcc_hi clamp
+// GFX11: [0xff,0xc0,0x03,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+
+v_add_nc_u16 v5.l, v1.h, v2.l
+// GFX11: [0x05,0x08,0x03,0xd7,0x01,0x05,0x02,0x00]
+
+v_add_nc_u16 v5.l, v255.l, v255.h
+// GFX11: [0x05,0x10,0x03,0xd7,0xff,0xff,0x03,0x00]
-v_add_nc_u16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp
-// GFX11: encoding: [0xff,0xc0,0x03,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+v_add_nc_u16 v255.h, 0xfe0b, vcc_hi clamp
+// GFX11: [0xff,0xc0,0x03,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
v_alignbit_b32 v5, v1, v2, s3
// GFX11: encoding: [0x05,0x00,0x16,0xd6,0x01,0x05,0x0e,0x00]
@@ -5801,50 +5819,59 @@ v_sub_co_u32 v5, ttmp[14:15], src_scc, vcc_lo
v_sub_co_u32 v255, null, 0xaf123456, vcc_hi clamp
// GFX11: encoding: [0xff,0xfc,0x01,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf]
-v_sub_nc_i16 v5, v1, v2
-// GFX11: encoding: [0x05,0x00,0x0e,0xd7,0x01,0x05,0x02,0x00]
+v_sub_nc_i16 v5.l, v1.h, v2.l
+// GFX11: [0x05,0x08,0x0e,0xd7,0x01,0x05,0x02,0x00]
-v_sub_nc_i16 v5, v255, v255
-// GFX11: encoding: [0x05,0x00,0x0e,0xd7,0xff,0xff,0x03,0x00]
+v_sub_nc_i16 v5.l, v255.l, v255.h
+// GFX11: [0x05,0x10,0x0e,0xd7,0xff,0xff,0x03,0x00]
-v_sub_nc_i16 v5, s1, s2
-// GFX11: encoding: [0x05,0x00,0x0e,0xd7,0x01,0x04,0x00,0x00]
+v_sub_nc_i16 v5.l, s1, s2
+// GFX11: [0x05,0x00,0x0e,0xd7,0x01,0x04,0x00,0x00]
-v_sub_nc_i16 v5, s105, s105
-// GFX11: encoding: [0x05,0x00,0x0e,0xd7,0x69,0xd2,0x00,0x00]
+v_sub_nc_i16 v5.l, s105, s105
+// GFX11: [0x05,0x00,0x0e,0xd7,0x69,0xd2,0x00,0x00]
-v_sub_nc_i16 v5, vcc_lo, ttmp15
-// GFX11: encoding: [0x05,0x00,0x0e,0xd7,0x6a,0xf6,0x00,0x00]
+v_sub_nc_i16 v5.l, vcc_lo, ttmp15
+// GFX11: [0x05,0x00,0x0e,0xd7,0x6a,0xf6,0x00,0x00]
-v_sub_nc_i16 v5, vcc_hi, 0xfe0b
-// GFX11: encoding: [0x05,0x00,0x0e,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+v_sub_nc_i16 v5.l, vcc_hi, 0xfe0b
+// GFX11: [0x05,0x00,0x0e,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
-v_sub_nc_i16 v5, ttmp15, src_scc
-// GFX11: encoding: [0x05,0x00,0x0e,0xd7,0x7b,0xfa,0x01,0x00]
+v_sub_nc_i16 v5.l, ttmp15, src_scc
+// GFX11: [0x05,0x00,0x0e,0xd7,0x7b,0xfa,0x01,0x00]
-v_sub_nc_i16 v5, m0, 0.5
+v_sub_nc_i16 v5.l, m0, 0.5
// GFX11: encoding: [0x05,0x00,0x0e,0xd7,0x7d,0xe0,0x01,0x00]
-v_sub_nc_i16 v5, exec_lo, -1
-// GFX11: encoding: [0x05,0x00,0x0e,0xd7,0x7e,0x82,0x01,0x00]
+v_sub_nc_i16 v5.l, exec_lo, -1
+// GFX11: [0x05,0x00,0x0e,0xd7,0x7e,0x82,0x01,0x00]
-v_sub_nc_i16 v5, exec_hi, null
-// GFX11: encoding: [0x05,0x00,0x0e,0xd7,0x7f,0xf8,0x00,0x00]
+v_sub_nc_i16 v5.l, exec_hi, null
+// GFX11: [0x05,0x00,0x0e,0xd7,0x7f,0xf8,0x00,0x00]
-v_sub_nc_i16 v5, null, exec_lo op_sel:[1,1,1]
-// GFX11: encoding: [0x05,0x58,0x0e,0xd7,0x7c,0xfc,0x00,0x00]
+v_sub_nc_i16 v5.l, null, exec_lo
+// GFX11: [0x05,0x00,0x0e,0xd7,0x7c,0xfc,0x00,0x00]
-v_sub_nc_i16 v5, -1, exec_hi op_sel:[0,0,0]
-// GFX11: encoding: [0x05,0x00,0x0e,0xd7,0xc1,0xfe,0x00,0x00]
+v_sub_nc_i16 v5.l, -1, exec_hi
+// GFX11: [0x05,0x00,0x0e,0xd7,0xc1,0xfe,0x00,0x00]
-v_sub_nc_i16 v5, 0.5, m0 op_sel:[1,0,0]
+v_sub_nc_i16 v5.l, 0.5, m0 op_sel:[1,0,0]
// GFX11: encoding: [0x05,0x08,0x0e,0xd7,0xf0,0xfa,0x00,0x00]
-v_sub_nc_i16 v5, src_scc, vcc_lo op_sel:[0,1,0]
-// GFX11: encoding: [0x05,0x10,0x0e,0xd7,0xfd,0xd4,0x00,0x00]
+v_sub_nc_i16 v5.l, src_scc, vcc_lo
+// GFX11: [0x05,0x00,0x0e,0xd7,0xfd,0xd4,0x00,0x00]
-v_sub_nc_i16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp
-// GFX11: encoding: [0xff,0xc0,0x0e,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+v_sub_nc_i16 v255.h, 0xfe0b, vcc_hi clamp
+// GFX11: [0xff,0xc0,0x0e,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+
+v_sub_nc_i16 v5.l, v1.h, v2.l
+// GFX11: [0x05,0x08,0x0e,0xd7,0x01,0x05,0x02,0x00]
+
+v_sub_nc_i16 v5.l, v255.l, v255.h
+// GFX11: [0x05,0x10,0x0e,0xd7,0xff,0xff,0x03,0x00]
+
+v_sub_nc_i16 v255.h, 0xfe0b, vcc_hi clamp
+// GFX11: [0xff,0xc0,0x0e,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
v_sub_nc_i32 v5, v1, v2
// GFX11: encoding: [0x05,0x00,0x25,0xd7,0x01,0x05,0x02,0x00]
@@ -5891,50 +5918,59 @@ v_sub_nc_i32 v5, src_scc, vcc_lo
v_sub_nc_i32 v255, 0xaf123456, vcc_hi clamp
// GFX11: encoding: [0xff,0x80,0x25,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf]
-v_sub_nc_u16 v5, v1, v2
-// GFX11: encoding: [0x05,0x00,0x04,0xd7,0x01,0x05,0x02,0x00]
+v_sub_nc_u16 v5.l, v1.h, v2.l
+// GFX11: [0x05,0x08,0x04,0xd7,0x01,0x05,0x02,0x00]
-v_sub_nc_u16 v5, v255, v255
-// GFX11: encoding: [0x05,0x00,0x04,0xd7,0xff,0xff,0x03,0x00]
+v_sub_nc_u16 v5.l, v255.l, v255.h
+// GFX11: [0x05,0x10,0x04,0xd7,0xff,0xff,0x03,0x00]
-v_sub_nc_u16 v5, s1, s2
-// GFX11: encoding: [0x05,0x00,0x04,0xd7,0x01,0x04,0x00,0x00]
+v_sub_nc_u16 v5.l, s1, s2
+// GFX11: [0x05,0x00,0x04,0xd7,0x01,0x04,0x00,0x00]
-v_sub_nc_u16 v5, s105, s105
-// GFX11: encoding: [0x05,0x00,0x04,0xd7,0x69,0xd2,0x00,0x00]
+v_sub_nc_u16 v5.l, s105, s105
+// GFX11: [0x05,0x00,0x04,0xd7,0x69,0xd2,0x00,0x00]
-v_sub_nc_u16 v5, vcc_lo, ttmp15
-// GFX11: encoding: [0x05,0x00,0x04,0xd7,0x6a,0xf6,0x00,0x00]
+v_sub_nc_u16 v5.l, vcc_lo, ttmp15
+// GFX11: [0x05,0x00,0x04,0xd7,0x6a,0xf6,0x00,0x00]
-v_sub_nc_u16 v5, vcc_hi, 0xfe0b
-// GFX11: encoding: [0x05,0x00,0x04,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+v_sub_nc_u16 v5.l, vcc_hi, 0xfe0b
+// GFX11: [0x05,0x00,0x04,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
-v_sub_nc_u16 v5, ttmp15, src_scc
-// GFX11: encoding: [0x05,0x00,0x04,0xd7,0x7b,0xfa,0x01,0x00]
+v_sub_nc_u16 v5.l, ttmp15, src_scc
+// GFX11: [0x05,0x00,0x04,0xd7,0x7b,0xfa,0x01,0x00]
-v_sub_nc_u16 v5, m0, 0.5
+v_sub_nc_u16 v5.l, m0, 0.5
// GFX11: encoding: [0x05,0x00,0x04,0xd7,0x7d,0xe0,0x01,0x00]
-v_sub_nc_u16 v5, exec_lo, -1
-// GFX11: encoding: [0x05,0x00,0x04,0xd7,0x7e,0x82,0x01,0x00]
+v_sub_nc_u16 v5.l, exec_lo, -1
+// GFX11: [0x05,0x00,0x04,0xd7,0x7e,0x82,0x01,0x00]
-v_sub_nc_u16 v5, exec_hi, null
-// GFX11: encoding: [0x05,0x00,0x04,0xd7,0x7f,0xf8,0x00,0x00]
+v_sub_nc_u16 v5.l, exec_hi, null
+// GFX11: [0x05,0x00,0x04,0xd7,0x7f,0xf8,0x00,0x00]
-v_sub_nc_u16 v5, null, exec_lo op_sel:[1,1,1]
-// GFX11: encoding: [0x05,0x58,0x04,0xd7,0x7c,0xfc,0x00,0x00]
+v_sub_nc_u16 v5.l, null, exec_lo
+// GFX11: [0x05,0x00,0x04,0xd7,0x7c,0xfc,0x00,0x00]
-v_sub_nc_u16 v5, -1, exec_hi op_sel:[0,0,0]
-// GFX11: encoding: [0x05,0x00,0x04,0xd7,0xc1,0xfe,0x00,0x00]
+v_sub_nc_u16 v5.l, -1, exec_hi
+// GFX11: [0x05,0x00,0x04,0xd7,0xc1,0xfe,0x00,0x00]
-v_sub_nc_u16 v5, 0.5, m0 op_sel:[1,0,0]
+v_sub_nc_u16 v5.l, 0.5, m0 op_sel:[1,0,0]
// GFX11: encoding: [0x05,0x08,0x04,0xd7,0xf0,0xfa,0x00,0x00]
-v_sub_nc_u16 v5, src_scc, vcc_lo op_sel:[0,1,0]
-// GFX11: encoding: [0x05,0x10,0x04,0xd7,0xfd,0xd4,0x00,0x00]
+v_sub_nc_u16 v5.l, src_scc, vcc_lo
+// GFX11: [0x05,0x00,0x04,0xd7,0xfd,0xd4,0x00,0x00]
+
+v_sub_nc_u16 v255.h, 0xfe0b, vcc_hi clamp
+// GFX11: [0xff,0xc0,0x04,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+
+v_sub_nc_u16 v5.l, v1.h, v2.l
+// GFX11: [0x05,0x08,0x04,0xd7,0x01,0x05,0x02,0x00]
+
+v_sub_nc_u16 v5.l, v255.l, v255.h
+// GFX11: [0x05,0x10,0x04,0xd7,0xff,0xff,0x03,0x00]
-v_sub_nc_u16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp
-// GFX11: encoding: [0xff,0xc0,0x04,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+v_sub_nc_u16 v255.h, 0xfe0b, vcc_hi clamp
+// GFX11: [0xff,0xc0,0x04,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
v_subrev_co_u32 v5, s6, v1, v2
// W32: encoding: [0x05,0x06,0x02,0xd7,0x01,0x05,0x02,0x00]
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s
index 3c693c556194e1..a25e8a6ccae60a 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s
@@ -194,47 +194,59 @@ v_add_lshl_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bo
v_add_lshl_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
// GFX11: [0xff,0x00,0x47,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30]
-v_add_nc_i16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0]
+v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0]
// GFX11: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
-v_add_nc_i16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3]
+v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3]
// GFX11: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
-v_add_nc_i16_e64_dpp v5, v1, v2 row_mirror
+v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_mirror
// GFX11: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
-v_add_nc_i16_e64_dpp v5, v1, v2 row_half_mirror
+v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_half_mirror
// GFX11: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
-v_add_nc_i16_e64_dpp v5, v1, v2 row_shl:1
+v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shl:1
// GFX11: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
-v_add_nc_i16_e64_dpp v5, v1, v2 row_shl:15
+v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shl:15
// GFX11: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
-v_add_nc_i16_e64_dpp v5, v1, v2 row_shr:1
+v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shr:1
// GFX11: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
-v_add_nc_i16_e64_dpp v5, v1, v2 row_shr:15
+v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shr:15
// GFX11: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
-v_add_nc_i16_e64_dpp v5, v1, v2 row_ror:1
+v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_ror:1
// GFX11: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
-v_add_nc_i16_e64_dpp v5, v1, v2 row_ror:15
+v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_ror:15
// GFX11: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
-v_add_nc_i16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// GFX11: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+v_add_nc_i16_e64_dpp v5.h, v1.h, v2.h row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+
+v_add_nc_i16_e64_dpp v5.l, v1.h, v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+
+v_add_nc_i16_e64_dpp v5.l, v1.l, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+
+v_add_nc_i16_e64_dpp v255.h, v255.l, v255.l clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
+
+v_add_nc_i16_e64_dpp v5.h, v1.h, v2.h row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
-v_add_nc_i16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// GFX11: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+v_add_nc_i16_e64_dpp v5.l, v1.h, v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
-v_add_nc_i16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// GFX11: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+v_add_nc_i16_e64_dpp v5.l, v1.l, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
-v_add_nc_i16_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// GFX11: [0xff,0x80,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
+v_add_nc_i16_e64_dpp v255.h, v255.l, v255.l clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
v_add_nc_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0]
// GFX11: [0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
@@ -278,47 +290,59 @@ v_add_nc_i32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctr
v_add_nc_i32_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
// GFX11: [0xff,0x80,0x26,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
-v_add_nc_u16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0]
+v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0]
// GFX11: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
-v_add_nc_u16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3]
+v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3]
// GFX11: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
-v_add_nc_u16_e64_dpp v5, v1, v2 row_mirror
+v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_mirror
// GFX11: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
-v_add_nc_u16_e64_dpp v5, v1, v2 row_half_mirror
+v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_half_mirror
// GFX11: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
-v_add_nc_u16_e64_dpp v5, v1, v2 row_shl:1
+v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shl:1
// GFX11: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
-v_add_nc_u16_e64_dpp v5, v1, v2 row_shl:15
+v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shl:15
// GFX11: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
-v_add_nc_u16_e64_dpp v5, v1, v2 row_shr:1
+v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shr:1
// GFX11: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
-v_add_nc_u16_e64_dpp v5, v1, v2 row_shr:15
+v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shr:15
// GFX11: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
-v_add_nc_u16_e64_dpp v5, v1, v2 row_ror:1
+v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_ror:1
// GFX11: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
-v_add_nc_u16_e64_dpp v5, v1, v2 row_ror:15
+v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_ror:15
// GFX11: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
-v_add_nc_u16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// GFX11: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+v_add_nc_u16_e64_dpp v5.h, v1.h, v2.h row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+
+v_add_nc_u16_e64_dpp v5.l, v1.h, v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+
+v_add_nc_u16_e64_dpp v5.l, v1.l, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+
+v_add_nc_u16_e64_dpp v255.h, v255.l, v255.l clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
+
+v_add_nc_u16_e64_dpp v5.h, v1.h, v2.h row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
-v_add_nc_u16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// GFX11: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+v_add_nc_u16_e64_dpp v5.l, v1.h, v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
-v_add_nc_u16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// GFX11: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+v_add_nc_u16_e64_dpp v5.l, v1.l, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
-v_add_nc_u16_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// GFX11: [0xff,0x80,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
+v_add_nc_u16_e64_dpp v255.h, v255.l, v255.l clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
v_alignbit_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0]
// GFX11: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff]
@@ -4116,47 +4140,59 @@ v_sub_co_u32_e64_dpp v5, ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:
v_sub_co_u32_e64_dpp v255, null, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
// GFX11: [0xff,0xfc,0x01,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
-v_sub_nc_i16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0]
+v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0]
// GFX11: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
-v_sub_nc_i16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3]
+v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3]
// GFX11: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
-v_sub_nc_i16_e64_dpp v5, v1, v2 row_mirror
+v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_mirror
// GFX11: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
-v_sub_nc_i16_e64_dpp v5, v1, v2 row_half_mirror
+v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_half_mirror
// GFX11: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
-v_sub_nc_i16_e64_dpp v5, v1, v2 row_shl:1
+v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shl:1
// GFX11: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
-v_sub_nc_i16_e64_dpp v5, v1, v2 row_shl:15
+v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shl:15
// GFX11: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
-v_sub_nc_i16_e64_dpp v5, v1, v2 row_shr:1
+v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shr:1
// GFX11: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
-v_sub_nc_i16_e64_dpp v5, v1, v2 row_shr:15
+v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shr:15
// GFX11: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
-v_sub_nc_i16_e64_dpp v5, v1, v2 row_ror:1
+v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_ror:1
// GFX11: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
-v_sub_nc_i16_e64_dpp v5, v1, v2 row_ror:15
+v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_ror:15
// GFX11: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
-v_sub_nc_i16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// GFX11: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+v_sub_nc_i16_e64_dpp v5.h, v1.h, v2.h row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+
+v_sub_nc_i16_e64_dpp v5.l, v1.h, v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+
+v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+
+v_sub_nc_i16_e64_dpp v255.h, v255.l, v255.l clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
-v_sub_nc_i16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// GFX11: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+v_sub_nc_i16_e64_dpp v5.h, v1.h, v2.h row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+
+v_sub_nc_i16_e64_dpp v5.l, v1.h, v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
-v_sub_nc_i16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// GFX11: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
-v_sub_nc_i16_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// GFX11: [0xff,0x80,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
+v_sub_nc_i16_e64_dpp v255.h, v255.l, v255.l clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
v_sub_nc_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0]
// GFX11: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
@@ -4200,47 +4236,59 @@ v_sub_nc_i32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctr
v_sub_nc_i32_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
// GFX11: [0xff,0x80,0x25,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
-v_sub_nc_u16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0]
+v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0]
// GFX11: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
-v_sub_nc_u16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3]
+v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3]
// GFX11: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
-v_sub_nc_u16_e64_dpp v5, v1, v2 row_mirror
+v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_mirror
// GFX11: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
-v_sub_nc_u16_e64_dpp v5, v1, v2 row_half_mirror
+v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_half_mirror
// GFX11: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
-v_sub_nc_u16_e64_dpp v5, v1, v2 row_shl:1
+v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shl:1
// GFX11: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
-v_sub_nc_u16_e64_dpp v5, v1, v2 row_shl:15
+v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shl:15
// GFX11: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
-v_sub_nc_u16_e64_dpp v5, v1, v2 row_shr:1
+v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shr:1
// GFX11: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
-v_sub_nc_u16_e64_dpp v5, v1, v2 row_shr:15
+v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shr:15
// GFX11: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
-v_sub_nc_u16_e64_dpp v5, v1, v2 row_ror:1
+v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_ror:1
// GFX11: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
-v_sub_nc_u16_e64_dpp v5, v1, v2 row_ror:15
+v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_ror:15
// GFX11: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
-v_sub_nc_u16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// GFX11: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+v_sub_nc_u16_e64_dpp v5.h, v1.h, v2.h row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+
+v_sub_nc_u16_e64_dpp v5.l, v1.h, v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+
+v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
-v_sub_nc_u16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// GFX11: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+v_sub_nc_u16_e64_dpp v255.h, v255.l, v255.l clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
-v_sub_nc_u16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// GFX11: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+v_sub_nc_u16_e64_dpp v5.h, v1.h, v2.h row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+
+v_sub_nc_u16_e64_dpp v5.l, v1.h, v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+
+v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
-v_sub_nc_u16_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// GFX11: [0xff,0x80,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
+v_sub_nc_u16_e64_dpp v255.h, v255.l, v255.l clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
v_subrev_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0]
// W32: [0x05,0x06,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
@@ -4475,30 +4523,6 @@ v_xor_b16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
v_xor_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
// GFX11: [0xff,0x00,0x64,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
-v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf
-// GFX11: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
-
-v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1
-// GFX11: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
-
-v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3
-// GFX11: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
-
-v_add_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1
-// GFX11: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
-
-v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf
-// GFX11: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
-
-v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1
-// GFX11: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
-
-v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3
-// GFX11: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
-
-v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1
-// GFX11: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
-
v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3
// GFX11: [0x05,0x0a,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
@@ -4724,30 +4748,6 @@ v_pack_b32_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 ban
v_pack_b32_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1
// GFX11: [0xff,0x13,0x11,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
-v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf
-// GFX11: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
-
-v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1
-// GFX11: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
-
-v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3
-// GFX11: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
-
-v_sub_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1
-// GFX11: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
-
-v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf
-// GFX11: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
-
-v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1
-// GFX11: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
-
-v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3
-// GFX11: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
-
-v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1
-// GFX11: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
-
v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1
// GFX11: encoding: [0x00,0x00,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00]
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s
index 79709278bc0c7b..f9803f4400702d 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s
@@ -114,14 +114,35 @@ v_add_lshl_u32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1
v_add_lshl_u32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0
// GFX11: [0xff,0x00,0x47,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00]
-v_add_nc_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
// GFX11: [0x05,0x00,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-v_add_nc_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
-// GFX11: [0x05,0x00,0x0d,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: [0x05,0x00,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_add_nc_i16_e64_dpp v5.h, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: [0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_add_nc_i16_e64_dpp v5.l, v1.h, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: [0x05,0x08,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_add_nc_i16_e64_dpp v5.l, v1.l, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX11: [0x05,0x10,0x0d,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_add_nc_i16_e64_dpp v255.h, v255.l, v255.l clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: [0xff,0xc0,0x0d,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+
+v_add_nc_i16_e64_dpp v5.h, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: [0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-v_add_nc_i16_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
-// GFX11: [0xff,0x80,0x0d,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+v_add_nc_i16_e64_dpp v5.l, v1.h, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: [0x05,0x08,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_add_nc_i16_e64_dpp v5.l, v1.l, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX11: [0x05,0x10,0x0d,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_add_nc_i16_e64_dpp v255.h, v255.l, v255.l clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: [0xff,0xc0,0x0d,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
v_add_nc_i32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: [0x05,0x00,0x26,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
@@ -132,14 +153,35 @@ v_add_nc_i32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
v_add_nc_i32_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
// GFX11: [0xff,0x80,0x26,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
-v_add_nc_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: [0x05,0x00,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
// GFX11: [0x05,0x00,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-v_add_nc_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
-// GFX11: [0x05,0x00,0x03,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_add_nc_u16_e64_dpp v5.h, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: [0x05,0x58,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_add_nc_u16_e64_dpp v5.l, v1.h, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: [0x05,0x08,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_add_nc_u16_e64_dpp v5.l, v1.l, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX11: [0x05,0x10,0x03,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-v_add_nc_u16_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
-// GFX11: [0xff,0x80,0x03,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+v_add_nc_u16_e64_dpp v255.h, v255.l, v255.l clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: [0xff,0xc0,0x03,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+
+v_add_nc_u16_e64_dpp v5.h, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: [0x05,0x58,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_add_nc_u16_e64_dpp v5.l, v1.h, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: [0x05,0x08,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_add_nc_u16_e64_dpp v5.l, v1.l, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX11: [0x05,0x10,0x03,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_add_nc_u16_e64_dpp v255.h, v255.l, v255.l clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: [0xff,0xc0,0x03,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
v_alignbit_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: [0x05,0x00,0x16,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05]
@@ -2601,14 +2643,35 @@ v_sub_co_u32_e64_dpp v5, ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
v_sub_co_u32_e64_dpp v255, null, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
// GFX11: [0xff,0xfc,0x01,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
-v_sub_nc_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: [0x05,0x00,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
// GFX11: [0x05,0x00,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-v_sub_nc_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
-// GFX11: [0x05,0x00,0x0e,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_sub_nc_i16_e64_dpp v5.h, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: [0x05,0x58,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_sub_nc_i16_e64_dpp v5.l, v1.h, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: [0x05,0x08,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX11: [0x05,0x10,0x0e,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_sub_nc_i16_e64_dpp v255.h, v255.l, v255.l clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: [0xff,0xc0,0x0e,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+
+v_sub_nc_i16_e64_dpp v5.h, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: [0x05,0x58,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_sub_nc_i16_e64_dpp v5.l, v1.h, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: [0x05,0x08,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX11: [0x05,0x10,0x0e,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-v_sub_nc_i16_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
-// GFX11: [0xff,0x80,0x0e,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+v_sub_nc_i16_e64_dpp v255.h, v255.l, v255.l clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: [0xff,0xc0,0x0e,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
v_sub_nc_i32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: [0x05,0x00,0x25,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
@@ -2619,14 +2682,35 @@ v_sub_nc_i32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
v_sub_nc_i32_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
// GFX11: [0xff,0x80,0x25,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
-v_sub_nc_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
// GFX11: [0x05,0x00,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-v_sub_nc_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
-// GFX11: [0x05,0x00,0x04,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: [0x05,0x00,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_sub_nc_u16_e64_dpp v5.h, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: [0x05,0x58,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_sub_nc_u16_e64_dpp v5.l, v1.h, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: [0x05,0x08,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX11: [0x05,0x10,0x04,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_sub_nc_u16_e64_dpp v255.h, v255.l, v255.l clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: [0xff,0xc0,0x04,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+
+v_sub_nc_u16_e64_dpp v5.h, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: [0x05,0x58,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-v_sub_nc_u16_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
-// GFX11: [0xff,0x80,0x04,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+v_sub_nc_u16_e64_dpp v5.l, v1.h, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: [0x05,0x08,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX11: [0x05,0x10,0x04,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_sub_nc_u16_e64_dpp v255.h, v255.l, v255.l clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: [0xff,0xc0,0x04,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
v_subrev_co_u32_e64_dpp v5, s6, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
// W32: [0x05,0x06,0x02,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
@@ -2748,30 +2832,6 @@ v_xor_b16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
v_xor_b16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
// GFX11: [0xff,0x00,0x64,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
-v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: [0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-
-v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: [0x05,0x08,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-
-v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: [0x05,0x10,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-
-v_add_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1
-// GFX11: [0xff,0xc0,0x0d,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
-
-v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: [0x05,0x58,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-
-v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: [0x05,0x08,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-
-v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: [0x05,0x10,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-
-v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1
-// GFX11: [0xff,0xc0,0x03,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
-
v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0]
// GFX11: [0x05,0x0a,0x12,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
@@ -2997,30 +3057,6 @@ v_pack_b32_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0]
v_pack_b32_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1
// GFX11: [0xff,0x13,0x11,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
-v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: [0x05,0x58,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-
-v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: [0x05,0x08,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-
-v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: [0x05,0x10,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-
-v_sub_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1
-// GFX11: [0xff,0xc0,0x0e,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
-
-v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: [0x05,0x58,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-
-v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: [0x05,0x08,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-
-v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: [0x05,0x10,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-
-v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1
-// GFX11: [0xff,0xc0,0x04,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
-
v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 dpp8:[0,1,2,3,4,4,4,4]
// GFX11: encoding: [0x00,0x00,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92]
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_opsel.s b/llvm/test/MC/AMDGPU/gfx12_asm_opsel.s
new file mode 100644
index 00000000000000..0c270aa5c8b1e2
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_opsel.s
@@ -0,0 +1,48 @@
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12 %s
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12 %s
+
+v_add_nc_i16 v5.h, null, exec_lo op_sel:[1,1,1]
+// GFX12: encoding: [0x05,0x58,0x0d,0xd7,0x7c,0xfc,0x00,0x00]
+
+v_add_nc_i16 v5.l, -1, exec_hi op_sel:[0,0,0]
+// GFX12: encoding: [0x05,0x00,0x0d,0xd7,0xc1,0xfe,0x00,0x00]
+
+v_add_nc_i16 v5.l, 0.5, m0 op_sel:[1,0,0]
+// GFX12: encoding: [0x05,0x08,0x0d,0xd7,0xf0,0xfa,0x00,0x00]
+
+v_add_nc_i16 v5.l, src_scc, vcc_lo op_sel:[0,1,0]
+// GFX12: encoding: [0x05,0x10,0x0d,0xd7,0xfd,0xd4,0x00,0x00]
+
+v_add_nc_i16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp
+// GFX12: encoding: [0xff,0xc0,0x0d,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+
+v_sub_nc_i16 v5.h, null, exec_lo op_sel:[1,1,1]
+// GFX12: encoding: [0x05,0x58,0x0e,0xd7,0x7c,0xfc,0x00,0x00]
+
+v_sub_nc_i16 v5.l, -1, exec_hi op_sel:[0,0,0]
+// GFX12: encoding: [0x05,0x00,0x0e,0xd7,0xc1,0xfe,0x00,0x00]
+
+v_sub_nc_i16 v5.l, 0.5, m0 op_sel:[1,0,0]
+// GFX12: encoding: [0x05,0x08,0x0e,0xd7,0xf0,0xfa,0x00,0x00]
+
+v_sub_nc_i16 v5.l, src_scc, vcc_lo op_sel:[0,1,0]
+// GFX12: encoding: [0x05,0x10,0x0e,0xd7,0xfd,0xd4,0x00,0x00]
+
+v_sub_nc_i16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp
+// GFX12: encoding: [0xff,0xc0,0x0e,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+
+v_sub_nc_u16 v5.h, null, exec_lo op_sel:[1,1,1]
+// GFX12: encoding: [0x05,0x58,0x04,0xd7,0x7c,0xfc,0x00,0x00]
+
+v_sub_nc_u16 v5.l, -1, exec_hi op_sel:[0,0,0]
+// GFX12: encoding: [0x05,0x00,0x04,0xd7,0xc1,0xfe,0x00,0x00]
+
+v_sub_nc_u16 v5.l, 0.5, m0 op_sel:[1,0,0]
+// GFX12: encoding: [0x05,0x08,0x04,0xd7,0xf0,0xfa,0x00,0x00]
+
+v_sub_nc_u16 v5.l, src_scc, vcc_lo op_sel:[0,1,0]
+// GFX12: encoding: [0x05,0x10,0x04,0xd7,0xfd,0xd4,0x00,0x00]
+
+v_sub_nc_u16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp
+// GFX12: encoding: [0xff,0xc0,0x04,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3.s
index f28933ec3a8945..1ae1eaf1ceeadd 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3.s
@@ -208,49 +208,58 @@ v_add_lshl_u32 v5, src_scc, vcc_lo, -1
v_add_lshl_u32 v255, 0xaf123456, vcc_hi, null
// GFX12: encoding: [0xff,0x00,0x47,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf]
-v_add_nc_i16 v5, v1, v2
+v_add_nc_i16 v5.l, v1.l, v2.l
// GFX12: encoding: [0x05,0x00,0x0d,0xd7,0x01,0x05,0x02,0x00]
-v_add_nc_i16 v5, v255, v255
+v_add_nc_i16 v5.l, v1.h, v2.l
+// GFX12: encoding: [0x05,0x08,0x0d,0xd7,0x01,0x05,0x02,0x00]
+
+v_add_nc_i16 v5.l, v255.l, v255.l
// GFX12: encoding: [0x05,0x00,0x0d,0xd7,0xff,0xff,0x03,0x00]
-v_add_nc_i16 v5, s1, s2
+v_add_nc_i16 v5.l, v255.l, v255.h
+// GFX12: encoding: [0x05,0x10,0x0d,0xd7,0xff,0xff,0x03,0x00]
+
+v_add_nc_i16 v5.l, s1, s2
// GFX12: encoding: [0x05,0x00,0x0d,0xd7,0x01,0x04,0x00,0x00]
-v_add_nc_i16 v5, s105, s105
+v_add_nc_i16 v5.l, s105, s105
// GFX12: encoding: [0x05,0x00,0x0d,0xd7,0x69,0xd2,0x00,0x00]
-v_add_nc_i16 v5, vcc_lo, ttmp15
+v_add_nc_i16 v5.l, vcc_lo, ttmp15
// GFX12: encoding: [0x05,0x00,0x0d,0xd7,0x6a,0xf6,0x00,0x00]
-v_add_nc_i16 v5, vcc_hi, 0xfe0b
+v_add_nc_i16 v5.l, vcc_hi, 0xfe0b
// GFX12: encoding: [0x05,0x00,0x0d,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
-v_add_nc_i16 v5, ttmp15, src_scc
+v_add_nc_i16 v5.l, ttmp15, src_scc
// GFX12: encoding: [0x05,0x00,0x0d,0xd7,0x7b,0xfa,0x01,0x00]
-v_add_nc_i16 v5, m0, 0.5
+v_add_nc_i16 v5.l, m0, 0.5
// GFX12: encoding: [0x05,0x00,0x0d,0xd7,0x7d,0xe0,0x01,0x00]
-v_add_nc_i16 v5, exec_lo, -1
+v_add_nc_i16 v5.l, exec_lo, -1
// GFX12: encoding: [0x05,0x00,0x0d,0xd7,0x7e,0x82,0x01,0x00]
-v_add_nc_i16 v5, exec_hi, null
+v_add_nc_i16 v5.l, exec_hi, null
// GFX12: encoding: [0x05,0x00,0x0d,0xd7,0x7f,0xf8,0x00,0x00]
-v_add_nc_i16 v5, null, exec_lo op_sel:[1,1,1]
+v_add_nc_i16 v5.h, null, exec_lo op_sel:[1,1,1]
// GFX12: encoding: [0x05,0x58,0x0d,0xd7,0x7c,0xfc,0x00,0x00]
-v_add_nc_i16 v5, -1, exec_hi op_sel:[0,0,0]
+v_add_nc_i16 v5.l, -1, exec_hi op_sel:[0,0,0]
// GFX12: encoding: [0x05,0x00,0x0d,0xd7,0xc1,0xfe,0x00,0x00]
-v_add_nc_i16 v5, 0.5, m0 op_sel:[1,0,0]
+v_add_nc_i16 v5.l, 0.5, m0 op_sel:[1,0,0]
// GFX12: encoding: [0x05,0x08,0x0d,0xd7,0xf0,0xfa,0x00,0x00]
-v_add_nc_i16 v5, src_scc, vcc_lo op_sel:[0,1,0]
+v_add_nc_i16 v5.l, src_scc, vcc_lo op_sel:[0,1,0]
// GFX12: encoding: [0x05,0x10,0x0d,0xd7,0xfd,0xd4,0x00,0x00]
-v_add_nc_i16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp
+v_add_nc_i16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp
+// GFX12: encoding: [0xff,0xc0,0x0d,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+
+v_add_nc_i16 v255.h, 0xfe0b, vcc_hi clamp
// GFX12: encoding: [0xff,0xc0,0x0d,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
v_add_nc_i32 v5, v1, v2
@@ -298,49 +307,58 @@ v_add_nc_i32 v5, src_scc, vcc_lo
v_add_nc_i32 v255, 0xaf123456, vcc_hi clamp
// GFX12: encoding: [0xff,0x80,0x26,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf]
-v_add_nc_u16 v5, v1, v2
+v_add_nc_u16 v5.l, v1.l, v2.l
// GFX12: encoding: [0x05,0x00,0x03,0xd7,0x01,0x05,0x02,0x00]
-v_add_nc_u16 v5, v255, v255
+v_add_nc_u16 v5.l, v1.h, v2.l
+// GFX12: encoding: [0x05,0x08,0x03,0xd7,0x01,0x05,0x02,0x00]
+
+v_add_nc_u16 v5.l, v255.l, v255.l
// GFX12: encoding: [0x05,0x00,0x03,0xd7,0xff,0xff,0x03,0x00]
-v_add_nc_u16 v5, s1, s2
+v_add_nc_u16 v5.l, v255.l, v255.h
+// GFX12: encoding: [0x05,0x10,0x03,0xd7,0xff,0xff,0x03,0x00]
+
+v_add_nc_u16 v5.l, s1, s2
// GFX12: encoding: [0x05,0x00,0x03,0xd7,0x01,0x04,0x00,0x00]
-v_add_nc_u16 v5, s105, s105
+v_add_nc_u16 v5.l, s105, s105
// GFX12: encoding: [0x05,0x00,0x03,0xd7,0x69,0xd2,0x00,0x00]
-v_add_nc_u16 v5, vcc_lo, ttmp15
+v_add_nc_u16 v5.l, vcc_lo, ttmp15
// GFX12: encoding: [0x05,0x00,0x03,0xd7,0x6a,0xf6,0x00,0x00]
-v_add_nc_u16 v5, vcc_hi, 0xfe0b
+v_add_nc_u16 v5.l, vcc_hi, 0xfe0b
// GFX12: encoding: [0x05,0x00,0x03,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
-v_add_nc_u16 v5, ttmp15, src_scc
+v_add_nc_u16 v5.l, ttmp15, src_scc
// GFX12: encoding: [0x05,0x00,0x03,0xd7,0x7b,0xfa,0x01,0x00]
-v_add_nc_u16 v5, m0, 0.5
+v_add_nc_u16 v5.l, m0, 0.5
// GFX12: encoding: [0x05,0x00,0x03,0xd7,0x7d,0xe0,0x01,0x00]
-v_add_nc_u16 v5, exec_lo, -1
+v_add_nc_u16 v5.l, exec_lo, -1
// GFX12: encoding: [0x05,0x00,0x03,0xd7,0x7e,0x82,0x01,0x00]
-v_add_nc_u16 v5, exec_hi, null
+v_add_nc_u16 v5.l, exec_hi, null
// GFX12: encoding: [0x05,0x00,0x03,0xd7,0x7f,0xf8,0x00,0x00]
-v_add_nc_u16 v5, null, exec_lo op_sel:[1,1,1]
+v_add_nc_u16 v5.h, null, exec_lo op_sel:[1,1,1]
// GFX12: encoding: [0x05,0x58,0x03,0xd7,0x7c,0xfc,0x00,0x00]
-v_add_nc_u16 v5, -1, exec_hi op_sel:[0,0,0]
+v_add_nc_u16 v5.l, -1, exec_hi op_sel:[0,0,0]
// GFX12: encoding: [0x05,0x00,0x03,0xd7,0xc1,0xfe,0x00,0x00]
-v_add_nc_u16 v5, 0.5, m0 op_sel:[1,0,0]
+v_add_nc_u16 v5.l, 0.5, m0 op_sel:[1,0,0]
// GFX12: encoding: [0x05,0x08,0x03,0xd7,0xf0,0xfa,0x00,0x00]
-v_add_nc_u16 v5, src_scc, vcc_lo op_sel:[0,1,0]
+v_add_nc_u16 v5.l, src_scc, vcc_lo op_sel:[0,1,0]
// GFX12: encoding: [0x05,0x10,0x03,0xd7,0xfd,0xd4,0x00,0x00]
-v_add_nc_u16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp
+v_add_nc_u16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp
+// GFX12: encoding: [0xff,0xc0,0x03,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+
+v_add_nc_u16 v255.h, 0xfe0b, vcc_hi clamp
// GFX12: encoding: [0xff,0xc0,0x03,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
v_alignbit_b32 v5, v1, v2, s3
@@ -5696,49 +5714,58 @@ v_sub_co_u32 v5, ttmp[14:15], src_scc, vcc_lo
v_sub_co_u32 v255, null, 0xaf123456, vcc_hi clamp
// GFX12: encoding: [0xff,0xfc,0x01,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf]
-v_sub_nc_i16 v5, v1, v2
+v_sub_nc_i16 v5.l, v1.l, v2.l
// GFX12: encoding: [0x05,0x00,0x0e,0xd7,0x01,0x05,0x02,0x00]
-v_sub_nc_i16 v5, v255, v255
+v_sub_nc_i16 v5.l, v1.h, v2.l
+// GFX12: encoding: [0x05,0x08,0x0e,0xd7,0x01,0x05,0x02,0x00]
+
+v_sub_nc_i16 v5.l, v255.l, v255.l
// GFX12: encoding: [0x05,0x00,0x0e,0xd7,0xff,0xff,0x03,0x00]
-v_sub_nc_i16 v5, s1, s2
+v_sub_nc_i16 v5.l, v255.l, v255.h
+// GFX12: encoding: [0x05,0x10,0x0e,0xd7,0xff,0xff,0x03,0x00]
+
+v_sub_nc_i16 v5.l, s1, s2
// GFX12: encoding: [0x05,0x00,0x0e,0xd7,0x01,0x04,0x00,0x00]
-v_sub_nc_i16 v5, s105, s105
+v_sub_nc_i16 v5.l, s105, s105
// GFX12: encoding: [0x05,0x00,0x0e,0xd7,0x69,0xd2,0x00,0x00]
-v_sub_nc_i16 v5, vcc_lo, ttmp15
+v_sub_nc_i16 v5.l, vcc_lo, ttmp15
// GFX12: encoding: [0x05,0x00,0x0e,0xd7,0x6a,0xf6,0x00,0x00]
-v_sub_nc_i16 v5, vcc_hi, 0xfe0b
+v_sub_nc_i16 v5.l, vcc_hi, 0xfe0b
// GFX12: encoding: [0x05,0x00,0x0e,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
-v_sub_nc_i16 v5, ttmp15, src_scc
+v_sub_nc_i16 v5.l, ttmp15, src_scc
// GFX12: encoding: [0x05,0x00,0x0e,0xd7,0x7b,0xfa,0x01,0x00]
-v_sub_nc_i16 v5, m0, 0.5
+v_sub_nc_i16 v5.l, m0, 0.5
// GFX12: encoding: [0x05,0x00,0x0e,0xd7,0x7d,0xe0,0x01,0x00]
-v_sub_nc_i16 v5, exec_lo, -1
+v_sub_nc_i16 v5.l, exec_lo, -1
// GFX12: encoding: [0x05,0x00,0x0e,0xd7,0x7e,0x82,0x01,0x00]
-v_sub_nc_i16 v5, exec_hi, null
+v_sub_nc_i16 v5.l, exec_hi, null
// GFX12: encoding: [0x05,0x00,0x0e,0xd7,0x7f,0xf8,0x00,0x00]
-v_sub_nc_i16 v5, null, exec_lo op_sel:[1,1,1]
+v_sub_nc_i16 v5.h, null, exec_lo op_sel:[1,1,1]
// GFX12: encoding: [0x05,0x58,0x0e,0xd7,0x7c,0xfc,0x00,0x00]
-v_sub_nc_i16 v5, -1, exec_hi op_sel:[0,0,0]
+v_sub_nc_i16 v5.l, -1, exec_hi op_sel:[0,0,0]
// GFX12: encoding: [0x05,0x00,0x0e,0xd7,0xc1,0xfe,0x00,0x00]
-v_sub_nc_i16 v5, 0.5, m0 op_sel:[1,0,0]
+v_sub_nc_i16 v5.l, 0.5, m0 op_sel:[1,0,0]
// GFX12: encoding: [0x05,0x08,0x0e,0xd7,0xf0,0xfa,0x00,0x00]
-v_sub_nc_i16 v5, src_scc, vcc_lo op_sel:[0,1,0]
+v_sub_nc_i16 v5.l, src_scc, vcc_lo op_sel:[0,1,0]
// GFX12: encoding: [0x05,0x10,0x0e,0xd7,0xfd,0xd4,0x00,0x00]
-v_sub_nc_i16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp
+v_sub_nc_i16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp
+// GFX12: encoding: [0xff,0xc0,0x0e,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+
+v_sub_nc_i16 v255.h, 0xfe0b, vcc_hi clamp
// GFX12: encoding: [0xff,0xc0,0x0e,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
v_sub_nc_i32 v5, v1, v2
@@ -5786,49 +5813,58 @@ v_sub_nc_i32 v5, src_scc, vcc_lo
v_sub_nc_i32 v255, 0xaf123456, vcc_hi clamp
// GFX12: encoding: [0xff,0x80,0x25,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf]
-v_sub_nc_u16 v5, v1, v2
+v_sub_nc_u16 v5.l, v1.l, v2.l
// GFX12: encoding: [0x05,0x00,0x04,0xd7,0x01,0x05,0x02,0x00]
-v_sub_nc_u16 v5, v255, v255
+v_sub_nc_u16 v5.l, v1.h, v2.l
+// GFX12: encoding: [0x05,0x08,0x04,0xd7,0x01,0x05,0x02,0x00]
+
+v_sub_nc_u16 v5.l, v255.l, v255.l
// GFX12: encoding: [0x05,0x00,0x04,0xd7,0xff,0xff,0x03,0x00]
-v_sub_nc_u16 v5, s1, s2
+v_sub_nc_u16 v5.l, v255.l, v255.h
+// GFX12: encoding: [0x05,0x10,0x04,0xd7,0xff,0xff,0x03,0x00]
+
+v_sub_nc_u16 v5.l, s1, s2
// GFX12: encoding: [0x05,0x00,0x04,0xd7,0x01,0x04,0x00,0x00]
-v_sub_nc_u16 v5, s105, s105
+v_sub_nc_u16 v5.l, s105, s105
// GFX12: encoding: [0x05,0x00,0x04,0xd7,0x69,0xd2,0x00,0x00]
-v_sub_nc_u16 v5, vcc_lo, ttmp15
+v_sub_nc_u16 v5.l, vcc_lo, ttmp15
// GFX12: encoding: [0x05,0x00,0x04,0xd7,0x6a,0xf6,0x00,0x00]
-v_sub_nc_u16 v5, vcc_hi, 0xfe0b
+v_sub_nc_u16 v5.l, vcc_hi, 0xfe0b
// GFX12: encoding: [0x05,0x00,0x04,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
-v_sub_nc_u16 v5, ttmp15, src_scc
+v_sub_nc_u16 v5.l, ttmp15, src_scc
// GFX12: encoding: [0x05,0x00,0x04,0xd7,0x7b,0xfa,0x01,0x00]
-v_sub_nc_u16 v5, m0, 0.5
+v_sub_nc_u16 v5.l, m0, 0.5
// GFX12: encoding: [0x05,0x00,0x04,0xd7,0x7d,0xe0,0x01,0x00]
-v_sub_nc_u16 v5, exec_lo, -1
+v_sub_nc_u16 v5.l, exec_lo, -1
// GFX12: encoding: [0x05,0x00,0x04,0xd7,0x7e,0x82,0x01,0x00]
-v_sub_nc_u16 v5, exec_hi, null
+v_sub_nc_u16 v5.l, exec_hi, null
// GFX12: encoding: [0x05,0x00,0x04,0xd7,0x7f,0xf8,0x00,0x00]
-v_sub_nc_u16 v5, null, exec_lo op_sel:[1,1,1]
+v_sub_nc_u16 v5.h, null, exec_lo op_sel:[1,1,1]
// GFX12: encoding: [0x05,0x58,0x04,0xd7,0x7c,0xfc,0x00,0x00]
-v_sub_nc_u16 v5, -1, exec_hi op_sel:[0,0,0]
+v_sub_nc_u16 v5.l, -1, exec_hi op_sel:[0,0,0]
// GFX12: encoding: [0x05,0x00,0x04,0xd7,0xc1,0xfe,0x00,0x00]
-v_sub_nc_u16 v5, 0.5, m0 op_sel:[1,0,0]
+v_sub_nc_u16 v5.l, 0.5, m0 op_sel:[1,0,0]
// GFX12: encoding: [0x05,0x08,0x04,0xd7,0xf0,0xfa,0x00,0x00]
-v_sub_nc_u16 v5, src_scc, vcc_lo op_sel:[0,1,0]
+v_sub_nc_u16 v5.l, src_scc, vcc_lo op_sel:[0,1,0]
// GFX12: encoding: [0x05,0x10,0x04,0xd7,0xfd,0xd4,0x00,0x00]
-v_sub_nc_u16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp
+v_sub_nc_u16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp
+// GFX12: encoding: [0xff,0xc0,0x04,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+
+v_sub_nc_u16 v255.h, 0xfe0b, vcc_hi clamp
// GFX12: encoding: [0xff,0xc0,0x04,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
v_subrev_co_u32 v5, s6, v1, v2
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s
index adf37901fc85fd..56bd0ee4b47465 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s
@@ -214,47 +214,71 @@ v_add_lshl_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bo
v_add_lshl_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
// GFX12: [0xff,0x00,0x47,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30]
-v_add_nc_i16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0]
-// GFX12: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+v_add_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1
+// GFX12: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
-v_add_nc_i16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3]
-// GFX12: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+v_add_nc_i16_e64_dpp v255.l, v255.l, v255.l clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: [0xff,0x80,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
-v_add_nc_i16_e64_dpp v5, v1, v2 row_mirror
-// GFX12: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+v_add_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
-v_add_nc_i16_e64_dpp v5, v1, v2 row_half_mirror
-// GFX12: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+v_add_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
-v_add_nc_i16_e64_dpp v5, v1, v2 row_shl:1
-// GFX12: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+v_add_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3
+// GFX12: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
-v_add_nc_i16_e64_dpp v5, v1, v2 row_shl:15
-// GFX12: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3]
+// GFX12: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
-v_add_nc_i16_e64_dpp v5, v1, v2 row_shr:1
-// GFX12: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0]
+// GFX12: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
-v_add_nc_i16_e64_dpp v5, v1, v2 row_shr:15
-// GFX12: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_half_mirror
+// GFX12: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+
+v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_mirror
+// GFX12: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
-v_add_nc_i16_e64_dpp v5, v1, v2 row_ror:1
+v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_ror:1
// GFX12: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
-v_add_nc_i16_e64_dpp v5, v1, v2 row_ror:15
+v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_ror:15
// GFX12: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
-v_add_nc_i16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
// GFX12: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
-v_add_nc_i16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1
// GFX12: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
-v_add_nc_i16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shl:1
+// GFX12: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+
+v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shl:15
+// GFX12: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+
+v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shr:1
+// GFX12: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+
+v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shr:15
+// GFX12: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+
+v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
// GFX12: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
-v_add_nc_i16_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// GFX12: [0xff,0x80,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
+v_add_nc_i16_e64_dpp v5.h, v1.h, v2.h row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+
+v_add_nc_i16_e64_dpp v5.l, v1.h, v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+
+v_add_nc_i16_e64_dpp v5.l, v1.l, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+
+v_add_nc_i16_e64_dpp v255.h, v255.l, v255.l clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
v_add_nc_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0]
// GFX12: [0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
@@ -298,47 +322,71 @@ v_add_nc_i32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctr
v_add_nc_i32_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
// GFX12: [0xff,0x80,0x26,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
-v_add_nc_u16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0]
-// GFX12: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+v_add_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1
+// GFX12: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
-v_add_nc_u16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3]
-// GFX12: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+v_add_nc_u16_e64_dpp v255.l, v255.l, v255.l clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: [0xff,0x80,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
-v_add_nc_u16_e64_dpp v5, v1, v2 row_mirror
-// GFX12: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+v_add_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
-v_add_nc_u16_e64_dpp v5, v1, v2 row_half_mirror
-// GFX12: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+v_add_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
-v_add_nc_u16_e64_dpp v5, v1, v2 row_shl:1
-// GFX12: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+v_add_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3
+// GFX12: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
-v_add_nc_u16_e64_dpp v5, v1, v2 row_shl:15
-// GFX12: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3]
+// GFX12: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
-v_add_nc_u16_e64_dpp v5, v1, v2 row_shr:1
-// GFX12: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0]
+// GFX12: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
-v_add_nc_u16_e64_dpp v5, v1, v2 row_shr:15
-// GFX12: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_half_mirror
+// GFX12: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+
+v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_mirror
+// GFX12: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
-v_add_nc_u16_e64_dpp v5, v1, v2 row_ror:1
+v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_ror:1
// GFX12: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
-v_add_nc_u16_e64_dpp v5, v1, v2 row_ror:15
+v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_ror:15
// GFX12: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
-v_add_nc_u16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
// GFX12: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
-v_add_nc_u16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1
// GFX12: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
-v_add_nc_u16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shl:1
+// GFX12: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+
+v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shl:15
+// GFX12: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+
+v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shr:1
+// GFX12: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+
+v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shr:15
+// GFX12: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+
+v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
// GFX12: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
-v_add_nc_u16_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// GFX12: [0xff,0x80,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
+v_add_nc_u16_e64_dpp v5.h, v1.h, v2.h row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+
+v_add_nc_u16_e64_dpp v5.l, v1.h, v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+
+v_add_nc_u16_e64_dpp v5.l, v1.l, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+
+v_add_nc_u16_e64_dpp v255.h, v255.l, v255.l clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
v_alignbit_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0]
// GFX12: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff]
@@ -4622,47 +4670,71 @@ v_sub_co_u32_e64_dpp v5, ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:
v_sub_co_u32_e64_dpp v255, null, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
// GFX12: [0xff,0xfc,0x01,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
-v_sub_nc_i16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0]
-// GFX12: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+v_sub_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1
+// GFX12: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
-v_sub_nc_i16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3]
-// GFX12: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+v_sub_nc_i16_e64_dpp v255.l, v255.l, v255.l clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: [0xff,0x80,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
-v_sub_nc_i16_e64_dpp v5, v1, v2 row_mirror
-// GFX12: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+v_sub_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
-v_sub_nc_i16_e64_dpp v5, v1, v2 row_half_mirror
-// GFX12: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+v_sub_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
-v_sub_nc_i16_e64_dpp v5, v1, v2 row_shl:1
-// GFX12: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3
+// GFX12: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
-v_sub_nc_i16_e64_dpp v5, v1, v2 row_shl:15
-// GFX12: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3]
+// GFX12: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
-v_sub_nc_i16_e64_dpp v5, v1, v2 row_shr:1
-// GFX12: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0]
+// GFX12: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
-v_sub_nc_i16_e64_dpp v5, v1, v2 row_shr:15
-// GFX12: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_half_mirror
+// GFX12: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
-v_sub_nc_i16_e64_dpp v5, v1, v2 row_ror:1
+v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_mirror
+// GFX12: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+
+v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_ror:1
// GFX12: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
-v_sub_nc_i16_e64_dpp v5, v1, v2 row_ror:15
+v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_ror:15
// GFX12: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
-v_sub_nc_i16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
// GFX12: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
-v_sub_nc_i16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1
// GFX12: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
-v_sub_nc_i16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shl:1
+// GFX12: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+
+v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shl:15
+// GFX12: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+
+v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shr:1
+// GFX12: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+
+v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shr:15
+// GFX12: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+
+v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
// GFX12: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
-v_sub_nc_i16_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// GFX12: [0xff,0x80,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
+v_sub_nc_i16_e64_dpp v5.h, v1.h, v2.h row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+
+v_sub_nc_i16_e64_dpp v5.l, v1.h, v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+
+v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+
+v_sub_nc_i16_e64_dpp v255.h, v255.l, v255.l clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
v_sub_nc_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0]
// GFX12: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
@@ -4706,47 +4778,71 @@ v_sub_nc_i32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctr
v_sub_nc_i32_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
// GFX12: [0xff,0x80,0x25,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
-v_sub_nc_u16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0]
-// GFX12: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+v_sub_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1
+// GFX12: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
-v_sub_nc_u16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3]
-// GFX12: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+v_sub_nc_u16_e64_dpp v255.l, v255.l, v255.l clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: [0xff,0x80,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
-v_sub_nc_u16_e64_dpp v5, v1, v2 row_mirror
-// GFX12: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+v_sub_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
-v_sub_nc_u16_e64_dpp v5, v1, v2 row_half_mirror
-// GFX12: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+v_sub_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
-v_sub_nc_u16_e64_dpp v5, v1, v2 row_shl:1
-// GFX12: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3
+// GFX12: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
-v_sub_nc_u16_e64_dpp v5, v1, v2 row_shl:15
-// GFX12: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3]
+// GFX12: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
-v_sub_nc_u16_e64_dpp v5, v1, v2 row_shr:1
-// GFX12: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0]
+// GFX12: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
-v_sub_nc_u16_e64_dpp v5, v1, v2 row_shr:15
-// GFX12: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_half_mirror
+// GFX12: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
-v_sub_nc_u16_e64_dpp v5, v1, v2 row_ror:1
+v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_mirror
+// GFX12: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+
+v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_ror:1
// GFX12: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
-v_sub_nc_u16_e64_dpp v5, v1, v2 row_ror:15
+v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_ror:15
// GFX12: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
-v_sub_nc_u16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
// GFX12: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
-v_sub_nc_u16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1
// GFX12: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
-v_sub_nc_u16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shl:1
+// GFX12: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+
+v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shl:15
+// GFX12: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+
+v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shr:1
+// GFX12: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+
+v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shr:15
+// GFX12: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+
+v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
// GFX12: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
-v_sub_nc_u16_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// GFX12: [0xff,0x80,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
+v_sub_nc_u16_e64_dpp v5.h, v1.h, v2.h row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+
+v_sub_nc_u16_e64_dpp v5.l, v1.h, v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+
+v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+
+v_sub_nc_u16_e64_dpp v255.h, v255.l, v255.l clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
v_subrev_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0]
// W32: [0x05,0x06,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
@@ -5001,30 +5097,6 @@ v_xor_b16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
v_xor_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
// GFX12: [0xff,0x00,0x64,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
-v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf
-// GFX12: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
-
-v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1
-// GFX12: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
-
-v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3
-// GFX12: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
-
-v_add_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1
-// GFX12: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
-
-v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf
-// GFX12: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
-
-v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1
-// GFX12: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
-
-v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3
-// GFX12: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
-
-v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1
-// GFX12: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
-
v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3
// GFX12: [0x05,0x0a,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
@@ -5250,30 +5322,6 @@ v_pack_b32_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 ban
v_pack_b32_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1
// GFX12: [0xff,0x13,0x11,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
-v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf
-// GFX12: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
-
-v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1
-// GFX12: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
-
-v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3
-// GFX12: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
-
-v_sub_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1
-// GFX12: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
-
-v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf
-// GFX12: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
-
-v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1
-// GFX12: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
-
-v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3
-// GFX12: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
-
-v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1
-// GFX12: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
-
v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1
// GFX12: [0x00,0x00,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00]
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s
index 1be122faccbc92..6331d22c6976d9 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s
@@ -134,14 +134,38 @@ v_add_lshl_u32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1
v_add_lshl_u32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0
// GFX12: [0xff,0x00,0x47,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00]
-v_add_nc_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+v_add_nc_i16_e64_dpp v255.l, v255.l, v255.l clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: [0xff,0x80,0x0d,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+
+v_add_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1
+// GFX12: [0xff,0xc0,0x0d,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+
+v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
// GFX12: [0x05,0x00,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-v_add_nc_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1
// GFX12: [0x05,0x00,0x0d,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-v_add_nc_i16_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
-// GFX12: [0xff,0x80,0x0d,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+v_add_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: [0x05,0x10,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_add_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: [0x05,0x08,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_add_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: [0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_add_nc_i16_e64_dpp v5.h, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: [0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_add_nc_i16_e64_dpp v5.l, v1.h, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: [0x05,0x08,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_add_nc_i16_e64_dpp v5.l, v1.l, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: [0x05,0x10,0x0d,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_add_nc_i16_e64_dpp v255.h, v255.l, v255.l clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: [0xff,0xc0,0x0d,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
v_add_nc_i32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX12: [0x05,0x00,0x26,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
@@ -152,14 +176,38 @@ v_add_nc_i32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
v_add_nc_i32_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
// GFX12: [0xff,0x80,0x26,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
-v_add_nc_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+v_add_nc_u16_e64_dpp v255.l, v255.l, v255.l clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: [0xff,0x80,0x03,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+
+v_add_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1
+// GFX12: [0xff,0xc0,0x03,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+
+v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
// GFX12: [0x05,0x00,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-v_add_nc_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1
// GFX12: [0x05,0x00,0x03,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-v_add_nc_u16_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
-// GFX12: [0xff,0x80,0x03,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+v_add_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: [0x05,0x10,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_add_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: [0x05,0x08,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_add_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: [0x05,0x58,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_add_nc_u16_e64_dpp v5.h, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: [0x05,0x58,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_add_nc_u16_e64_dpp v5.l, v1.h, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: [0x05,0x08,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_add_nc_u16_e64_dpp v5.l, v1.l, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: [0x05,0x10,0x03,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_add_nc_u16_e64_dpp v255.h, v255.l, v255.l clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: [0xff,0xc0,0x03,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
v_alignbit_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0]
// GFX12: [0x05,0x00,0x16,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05]
@@ -3043,14 +3091,38 @@ v_sub_co_u32_e64_dpp v5, ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
v_sub_co_u32_e64_dpp v255, null, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
// GFX12: [0xff,0xfc,0x01,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
-v_sub_nc_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+v_sub_nc_i16_e64_dpp v255.l, v255.l, v255.l clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: [0xff,0x80,0x0e,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+
+v_sub_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1
+// GFX12: [0xff,0xc0,0x0e,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+
+v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
// GFX12: [0x05,0x00,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-v_sub_nc_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1
// GFX12: [0x05,0x00,0x0e,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-v_sub_nc_i16_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
-// GFX12: [0xff,0x80,0x0e,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: [0x05,0x10,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_sub_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: [0x05,0x08,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_sub_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: [0x05,0x58,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_sub_nc_i16_e64_dpp v5.h, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: [0x05,0x58,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_sub_nc_i16_e64_dpp v5.l, v1.h, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: [0x05,0x08,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: [0x05,0x10,0x0e,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_sub_nc_i16_e64_dpp v255.h, v255.l, v255.l clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: [0xff,0xc0,0x0e,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
v_sub_nc_i32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX12: [0x05,0x00,0x25,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
@@ -3061,14 +3133,38 @@ v_sub_nc_i32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
v_sub_nc_i32_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
// GFX12: [0xff,0x80,0x25,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
-v_sub_nc_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+v_sub_nc_u16_e64_dpp v255.l, v255.l, v255.l clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: [0xff,0x80,0x04,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+
+v_sub_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1
+// GFX12: [0xff,0xc0,0x04,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+
+v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
// GFX12: [0x05,0x00,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-v_sub_nc_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1
// GFX12: [0x05,0x00,0x04,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-v_sub_nc_u16_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
-// GFX12: [0xff,0x80,0x04,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: [0x05,0x10,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_sub_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: [0x05,0x08,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_sub_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: [0x05,0x58,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_sub_nc_u16_e64_dpp v5.h, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: [0x05,0x58,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_sub_nc_u16_e64_dpp v5.l, v1.h, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: [0x05,0x08,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: [0x05,0x10,0x04,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_sub_nc_u16_e64_dpp v255.h, v255.l, v255.l clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: [0xff,0xc0,0x04,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
v_subrev_co_u32_e64_dpp v5, s6, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
// W32: [0x05,0x06,0x02,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
@@ -3210,30 +3306,6 @@ v_xor_b16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
v_xor_b16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
// GFX12: [0xff,0x00,0x64,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
-v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: [0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-
-v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: [0x05,0x08,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-
-v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: [0x05,0x10,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-
-v_add_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1
-// GFX12: [0xff,0xc0,0x0d,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
-
-v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: [0x05,0x58,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-
-v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: [0x05,0x08,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-
-v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: [0x05,0x10,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-
-v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1
-// GFX12: [0xff,0xc0,0x03,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
-
v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0]
// GFX12: [0x05,0x0a,0x12,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
@@ -3459,30 +3531,6 @@ v_pack_b32_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0]
v_pack_b32_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1
// GFX12: [0xff,0x13,0x11,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
-v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: [0x05,0x58,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-
-v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: [0x05,0x08,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-
-v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: [0x05,0x10,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-
-v_sub_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1
-// GFX12: [0xff,0xc0,0x0e,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
-
-v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: [0x05,0x58,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-
-v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: [0x05,0x08,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-
-v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: [0x05,0x10,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-
-v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1
-// GFX12: [0xff,0xc0,0x04,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
-
v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 dpp8:[0,1,2,3,4,4,4,4]
// GFX12: [0x00,0x00,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3.txt
index 07058a64515920..365caa5f9b6d09 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3.txt
@@ -189,49 +189,112 @@
# GFX11: v_add_lshl_u32 v255, 0xaf123456, vcc_hi, null ; encoding: [0xff,0x00,0x47,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf]
0xff,0x00,0x47,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf
-# GFX11: v_add_nc_i16 v5, v1, v2 ; encoding: [0x05,0x00,0x0d,0xd7,0x01,0x05,0x02,0x00]
-0x05,0x00,0x0d,0xd7,0x01,0x05,0x02,0x00
-
-# GFX11: v_add_nc_i16 v5, v255, v255 ; encoding: [0x05,0x00,0x0d,0xd7,0xff,0xff,0x03,0x00]
-0x05,0x00,0x0d,0xd7,0xff,0xff,0x03,0x00
-
-# GFX11: v_add_nc_i16 v5, s1, s2 ; encoding: [0x05,0x00,0x0d,0xd7,0x01,0x04,0x00,0x00]
+# W32-REAL16: v_add_nc_i16 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0d,0xd7,0x01,0x05,0x02,0x00]
+# W32-FAKE16: v_add_nc_i16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0d,0xd7,0x01,0x05,0x02,0x00]
+# W64-REAL16: v_add_nc_i16 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0d,0xd7,0x01,0x05,0x02,0x00]
+# W64-FAKE16: v_add_nc_i16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0d,0xd7,0x01,0x05,0x02,0x00]
+0x05,0x08,0x0d,0xd7,0x01,0x05,0x02,0x00
+
+# W32-REAL16: v_add_nc_i16 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xff,0xff,0x03,0x00]
+# W32-FAKE16: v_add_nc_i16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xff,0xff,0x03,0x00]
+# W64-REAL16: v_add_nc_i16 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xff,0xff,0x03,0x00]
+# W64-FAKE16: v_add_nc_i16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xff,0xff,0x03,0x00]
+0x05,0x10,0x0d,0xd7,0xff,0xff,0x03,0x00
+
+# W32-REAL16: v_add_nc_i16 v5.l, s1, s2 ; encoding: [0x05,0x00,0x0d,0xd7,0x01,0x04,0x00,0x00]
+# W32-FAKE16: v_add_nc_i16 v5, s1, s2 ; encoding: [0x05,0x00,0x0d,0xd7,0x01,0x04,0x00,0x00]
+# W64-REAL16: v_add_nc_i16 v5.l, s1, s2 ; encoding: [0x05,0x00,0x0d,0xd7,0x01,0x04,0x00,0x00]
+# W64-FAKE16: v_add_nc_i16 v5, s1, s2 ; encoding: [0x05,0x00,0x0d,0xd7,0x01,0x04,0x00,0x00]
0x05,0x00,0x0d,0xd7,0x01,0x04,0x00,0x00
-# GFX11: v_add_nc_i16 v5, s105, s105 ; encoding: [0x05,0x00,0x0d,0xd7,0x69,0xd2,0x00,0x00]
+# W32-REAL16: v_add_nc_i16 v5.l, s105, s105 ; encoding: [0x05,0x00,0x0d,0xd7,0x69,0xd2,0x00,0x00]
+# W32-FAKE16: v_add_nc_i16 v5, s105, s105 ; encoding: [0x05,0x00,0x0d,0xd7,0x69,0xd2,0x00,0x00]
+# W64-REAL16: v_add_nc_i16 v5.l, s105, s105 ; encoding: [0x05,0x00,0x0d,0xd7,0x69,0xd2,0x00,0x00]
+# W64-FAKE16: v_add_nc_i16 v5, s105, s105 ; encoding: [0x05,0x00,0x0d,0xd7,0x69,0xd2,0x00,0x00]
0x05,0x00,0x0d,0xd7,0x69,0xd2,0x00,0x00
-# GFX11: v_add_nc_i16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x0d,0xd7,0x6a,0xf6,0x00,0x00]
+# W32-REAL16: v_add_nc_i16 v5.l, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x0d,0xd7,0x6a,0xf6,0x00,0x00]
+# W32-FAKE16: v_add_nc_i16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x0d,0xd7,0x6a,0xf6,0x00,0x00]
+# W64-REAL16: v_add_nc_i16 v5.l, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x0d,0xd7,0x6a,0xf6,0x00,0x00]
+# W64-FAKE16: v_add_nc_i16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x0d,0xd7,0x6a,0xf6,0x00,0x00]
0x05,0x00,0x0d,0xd7,0x6a,0xf6,0x00,0x00
-# GFX11: v_add_nc_i16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x0d,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+# W32-REAL16: v_add_nc_i16 v5.l, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x0d,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_add_nc_i16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x0d,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_add_nc_i16 v5.l, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x0d,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_add_nc_i16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x0d,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
0x05,0x00,0x0d,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00
-# GFX11: v_add_nc_i16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x0d,0xd7,0x7b,0xfa,0x01,0x00]
+# W32-REAL16: v_add_nc_i16 v5.l, ttmp15, src_scc ; encoding: [0x05,0x00,0x0d,0xd7,0x7b,0xfa,0x01,0x00]
+# W32-FAKE16: v_add_nc_i16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x0d,0xd7,0x7b,0xfa,0x01,0x00]
+# W64-REAL16: v_add_nc_i16 v5.l, ttmp15, src_scc ; encoding: [0x05,0x00,0x0d,0xd7,0x7b,0xfa,0x01,0x00]
+# W64-FAKE16: v_add_nc_i16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x0d,0xd7,0x7b,0xfa,0x01,0x00]
0x05,0x00,0x0d,0xd7,0x7b,0xfa,0x01,0x00
-# GFX11: v_add_nc_i16 v5, m0, 0x3800
+# W32-REAL16: v_add_nc_i16 v5.l, m0, 0x3800 ; encoding: [0x05,0x00,0x0d,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00]
+# W32-FAKE16: v_add_nc_i16 v5, m0, 0x3800 ; encoding: [0x05,0x00,0x0d,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00]
+# W64-REAL16: v_add_nc_i16 v5.l, m0, 0x3800 ; encoding: [0x05,0x00,0x0d,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00]
+# W64-FAKE16: v_add_nc_i16 v5, m0, 0x3800 ; encoding: [0x05,0x00,0x0d,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00]
0x05,0x00,0x0d,0xd7,0x7d,0xe0,0x01,0x00
-# GFX11: v_add_nc_i16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x0d,0xd7,0x7e,0x82,0x01,0x00]
+# W32-REAL16: v_add_nc_i16 v5.l, exec_lo, -1 ; encoding: [0x05,0x00,0x0d,0xd7,0x7e,0x82,0x01,0x00]
+# W32-FAKE16: v_add_nc_i16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x0d,0xd7,0x7e,0x82,0x01,0x00]
+# W64-REAL16: v_add_nc_i16 v5.l, exec_lo, -1 ; encoding: [0x05,0x00,0x0d,0xd7,0x7e,0x82,0x01,0x00]
+# W64-FAKE16: v_add_nc_i16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x0d,0xd7,0x7e,0x82,0x01,0x00]
0x05,0x00,0x0d,0xd7,0x7e,0x82,0x01,0x00
-# GFX11: v_add_nc_i16 v5, exec_hi, null ; encoding: [0x05,0x00,0x0d,0xd7,0x7f,0xf8,0x00,0x00]
+# W32-REAL16: v_add_nc_i16 v5.l, exec_hi, null ; encoding: [0x05,0x00,0x0d,0xd7,0x7f,0xf8,0x00,0x00]
+# W32-FAKE16: v_add_nc_i16 v5, exec_hi, null ; encoding: [0x05,0x00,0x0d,0xd7,0x7f,0xf8,0x00,0x00]
+# W64-REAL16: v_add_nc_i16 v5.l, exec_hi, null ; encoding: [0x05,0x00,0x0d,0xd7,0x7f,0xf8,0x00,0x00]
+# W64-FAKE16: v_add_nc_i16 v5, exec_hi, null ; encoding: [0x05,0x00,0x0d,0xd7,0x7f,0xf8,0x00,0x00]
0x05,0x00,0x0d,0xd7,0x7f,0xf8,0x00,0x00
-# GFX11: v_add_nc_i16 v5, null, exec_lo ; encoding: [0x05,0x00,0x0d,0xd7,0x7c,0xfc,0x00,0x00]
+# W32-REAL16: v_add_nc_i16 v5.l, null, exec_lo ; encoding: [0x05,0x00,0x0d,0xd7,0x7c,0xfc,0x00,0x00]
+# W32-FAKE16: v_add_nc_i16 v5, null, exec_lo ; encoding: [0x05,0x00,0x0d,0xd7,0x7c,0xfc,0x00,0x00]
+# W64-REAL16: v_add_nc_i16 v5.l, null, exec_lo ; encoding: [0x05,0x00,0x0d,0xd7,0x7c,0xfc,0x00,0x00]
+# W64-FAKE16: v_add_nc_i16 v5, null, exec_lo ; encoding: [0x05,0x00,0x0d,0xd7,0x7c,0xfc,0x00,0x00]
0x05,0x00,0x0d,0xd7,0x7c,0xfc,0x00,0x00
-# GFX11: v_add_nc_i16 v5, -1, exec_hi op_sel:[1,1,1] ; encoding: [0x05,0x58,0x0d,0xd7,0xc1,0xfe,0x00,0x00]
-0x05,0x58,0x0d,0xd7,0xc1,0xfe,0x00,0x00
-
-# GFX11: v_add_nc_i16 v5, 0x3800, m0 op_sel:[1,0,0]
-0x05,0x08,0x0d,0xd7,0xf0,0xfa,0x00,0x00
-
-# GFX11: v_add_nc_i16 v5, src_scc, vcc_lo op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xfd,0xd4,0x00,0x00]
-0x05,0x10,0x0d,0xd7,0xfd,0xd4,0x00,0x00
+# W32-REAL16: v_add_nc_i16 v5.l, -1, exec_hi ; encoding: [0x05,0x00,0x0d,0xd7,0xc1,0xfe,0x00,0x00]
+# W32-FAKE16: v_add_nc_i16 v5, -1, exec_hi ; encoding: [0x05,0x00,0x0d,0xd7,0xc1,0xfe,0x00,0x00]
+# W64-REAL16: v_add_nc_i16 v5.l, -1, exec_hi ; encoding: [0x05,0x00,0x0d,0xd7,0xc1,0xfe,0x00,0x00]
+# W64-FAKE16: v_add_nc_i16 v5, -1, exec_hi ; encoding: [0x05,0x00,0x0d,0xd7,0xc1,0xfe,0x00,0x00]
+0x05,0x00,0x0d,0xd7,0xc1,0xfe,0x00,0x00
+
+# W32-REAL16: v_add_nc_i16 v5.l, 0x3800, m0 ; encoding: [0x05,0x00,0x0d,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00]
+# W32-FAKE16: v_add_nc_i16 v5, 0x3800, m0 ; encoding: [0x05,0x00,0x0d,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00]
+# W64-REAL16: v_add_nc_i16 v5.l, 0x3800, m0 ; encoding: [0x05,0x00,0x0d,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00]
+# W64-FAKE16: v_add_nc_i16 v5, 0x3800, m0 ; encoding: [0x05,0x00,0x0d,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00]
+0x05,0x00,0x0d,0xd7,0xf0,0xfa,0x00,0x00
+
+# W32-REAL16: v_add_nc_i16 v5.l, src_scc, vcc_lo ; encoding: [0x05,0x00,0x0d,0xd7,0xfd,0xd4,0x00,0x00]
+# W32-FAKE16: v_add_nc_i16 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x0d,0xd7,0xfd,0xd4,0x00,0x00]
+# W64-REAL16: v_add_nc_i16 v5.l, src_scc, vcc_lo ; encoding: [0x05,0x00,0x0d,0xd7,0xfd,0xd4,0x00,0x00]
+# W64-FAKE16: v_add_nc_i16 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x0d,0xd7,0xfd,0xd4,0x00,0x00]
+0x05,0x00,0x0d,0xd7,0xfd,0xd4,0x00,0x00
+
+# W32-REAL16: v_add_nc_i16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0d,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_add_nc_i16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0d,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_add_nc_i16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0d,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_add_nc_i16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0d,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+0xff,0xc0,0x0d,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00
-# GFX11: v_add_nc_i16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0d,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W32-REAL16: v_add_nc_i16 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0d,0xd7,0x01,0x05,0x02,0x00]
+# W32-FAKE16: v_add_nc_i16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0d,0xd7,0x01,0x05,0x02,0x00]
+# W64-REAL16: v_add_nc_i16 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0d,0xd7,0x01,0x05,0x02,0x00]
+# W64-FAKE16: v_add_nc_i16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0d,0xd7,0x01,0x05,0x02,0x00]
+0x05,0x08,0x0d,0xd7,0x01,0x05,0x02,0x00
+
+# W32-REAL16: v_add_nc_i16 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xff,0xff,0x03,0x00]
+# W32-FAKE16: v_add_nc_i16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xff,0xff,0x03,0x00]
+# W64-REAL16: v_add_nc_i16 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xff,0xff,0x03,0x00]
+# W64-FAKE16: v_add_nc_i16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xff,0xff,0x03,0x00]
+0x05,0x10,0x0d,0xd7,0xff,0xff,0x03,0x00
+
+# W32-REAL16: v_add_nc_i16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0d,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_add_nc_i16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0d,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_add_nc_i16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0d,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_add_nc_i16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0d,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
0xff,0xc0,0x0d,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00
# GFX11: v_add_nc_i32 v5, v1, v2 ; encoding: [0x05,0x00,0x26,0xd7,0x01,0x05,0x02,0x00]
@@ -279,49 +342,112 @@
# GFX11: v_add_nc_i32 v255, 0xaf123456, vcc_hi clamp ; encoding: [0xff,0x80,0x26,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf]
0xff,0x80,0x26,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf
-# GFX11: v_add_nc_u16 v5, v1, v2 ; encoding: [0x05,0x00,0x03,0xd7,0x01,0x05,0x02,0x00]
-0x05,0x00,0x03,0xd7,0x01,0x05,0x02,0x00
-
-# GFX11: v_add_nc_u16 v5, v255, v255 ; encoding: [0x05,0x00,0x03,0xd7,0xff,0xff,0x03,0x00]
-0x05,0x00,0x03,0xd7,0xff,0xff,0x03,0x00
-
-# GFX11: v_add_nc_u16 v5, s1, s2 ; encoding: [0x05,0x00,0x03,0xd7,0x01,0x04,0x00,0x00]
+# W32-REAL16: v_add_nc_u16 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x03,0xd7,0x01,0x05,0x02,0x00]
+# W32-FAKE16: v_add_nc_u16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x03,0xd7,0x01,0x05,0x02,0x00]
+# W64-REAL16: v_add_nc_u16 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x03,0xd7,0x01,0x05,0x02,0x00]
+# W64-FAKE16: v_add_nc_u16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x03,0xd7,0x01,0x05,0x02,0x00]
+0x05,0x08,0x03,0xd7,0x01,0x05,0x02,0x00
+
+# W32-REAL16: v_add_nc_u16 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xff,0xff,0x03,0x00]
+# W32-FAKE16: v_add_nc_u16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xff,0xff,0x03,0x00]
+# W64-REAL16: v_add_nc_u16 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xff,0xff,0x03,0x00]
+# W64-FAKE16: v_add_nc_u16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xff,0xff,0x03,0x00]
+0x05,0x10,0x03,0xd7,0xff,0xff,0x03,0x00
+
+# W32-REAL16: v_add_nc_u16 v5.l, s1, s2 ; encoding: [0x05,0x00,0x03,0xd7,0x01,0x04,0x00,0x00]
+# W32-FAKE16: v_add_nc_u16 v5, s1, s2 ; encoding: [0x05,0x00,0x03,0xd7,0x01,0x04,0x00,0x00]
+# W64-REAL16: v_add_nc_u16 v5.l, s1, s2 ; encoding: [0x05,0x00,0x03,0xd7,0x01,0x04,0x00,0x00]
+# W64-FAKE16: v_add_nc_u16 v5, s1, s2 ; encoding: [0x05,0x00,0x03,0xd7,0x01,0x04,0x00,0x00]
0x05,0x00,0x03,0xd7,0x01,0x04,0x00,0x00
-# GFX11: v_add_nc_u16 v5, s105, s105 ; encoding: [0x05,0x00,0x03,0xd7,0x69,0xd2,0x00,0x00]
+# W32-REAL16: v_add_nc_u16 v5.l, s105, s105 ; encoding: [0x05,0x00,0x03,0xd7,0x69,0xd2,0x00,0x00]
+# W32-FAKE16: v_add_nc_u16 v5, s105, s105 ; encoding: [0x05,0x00,0x03,0xd7,0x69,0xd2,0x00,0x00]
+# W64-REAL16: v_add_nc_u16 v5.l, s105, s105 ; encoding: [0x05,0x00,0x03,0xd7,0x69,0xd2,0x00,0x00]
+# W64-FAKE16: v_add_nc_u16 v5, s105, s105 ; encoding: [0x05,0x00,0x03,0xd7,0x69,0xd2,0x00,0x00]
0x05,0x00,0x03,0xd7,0x69,0xd2,0x00,0x00
-# GFX11: v_add_nc_u16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x03,0xd7,0x6a,0xf6,0x00,0x00]
+# W32-REAL16: v_add_nc_u16 v5.l, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x03,0xd7,0x6a,0xf6,0x00,0x00]
+# W32-FAKE16: v_add_nc_u16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x03,0xd7,0x6a,0xf6,0x00,0x00]
+# W64-REAL16: v_add_nc_u16 v5.l, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x03,0xd7,0x6a,0xf6,0x00,0x00]
+# W64-FAKE16: v_add_nc_u16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x03,0xd7,0x6a,0xf6,0x00,0x00]
0x05,0x00,0x03,0xd7,0x6a,0xf6,0x00,0x00
-# GFX11: v_add_nc_u16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x03,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+# W32-REAL16: v_add_nc_u16 v5.l, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x03,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_add_nc_u16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x03,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_add_nc_u16 v5.l, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x03,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_add_nc_u16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x03,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
0x05,0x00,0x03,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00
-# GFX11: v_add_nc_u16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x03,0xd7,0x7b,0xfa,0x01,0x00]
+# W32-REAL16: v_add_nc_u16 v5.l, ttmp15, src_scc ; encoding: [0x05,0x00,0x03,0xd7,0x7b,0xfa,0x01,0x00]
+# W32-FAKE16: v_add_nc_u16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x03,0xd7,0x7b,0xfa,0x01,0x00]
+# W64-REAL16: v_add_nc_u16 v5.l, ttmp15, src_scc ; encoding: [0x05,0x00,0x03,0xd7,0x7b,0xfa,0x01,0x00]
+# W64-FAKE16: v_add_nc_u16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x03,0xd7,0x7b,0xfa,0x01,0x00]
0x05,0x00,0x03,0xd7,0x7b,0xfa,0x01,0x00
-# GFX11: v_add_nc_u16 v5, m0, 0x3800
+# W32-REAL16: v_add_nc_u16 v5.l, m0, 0x3800 ; encoding: [0x05,0x00,0x03,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00]
+# W32-FAKE16: v_add_nc_u16 v5, m0, 0x3800 ; encoding: [0x05,0x00,0x03,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00]
+# W64-REAL16: v_add_nc_u16 v5.l, m0, 0x3800 ; encoding: [0x05,0x00,0x03,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00]
+# W64-FAKE16: v_add_nc_u16 v5, m0, 0x3800 ; encoding: [0x05,0x00,0x03,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00]
0x05,0x00,0x03,0xd7,0x7d,0xe0,0x01,0x00
-# GFX11: v_add_nc_u16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x03,0xd7,0x7e,0x82,0x01,0x00]
+# W32-REAL16: v_add_nc_u16 v5.l, exec_lo, -1 ; encoding: [0x05,0x00,0x03,0xd7,0x7e,0x82,0x01,0x00]
+# W32-FAKE16: v_add_nc_u16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x03,0xd7,0x7e,0x82,0x01,0x00]
+# W64-REAL16: v_add_nc_u16 v5.l, exec_lo, -1 ; encoding: [0x05,0x00,0x03,0xd7,0x7e,0x82,0x01,0x00]
+# W64-FAKE16: v_add_nc_u16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x03,0xd7,0x7e,0x82,0x01,0x00]
0x05,0x00,0x03,0xd7,0x7e,0x82,0x01,0x00
-# GFX11: v_add_nc_u16 v5, exec_hi, null ; encoding: [0x05,0x00,0x03,0xd7,0x7f,0xf8,0x00,0x00]
+# W32-REAL16: v_add_nc_u16 v5.l, exec_hi, null ; encoding: [0x05,0x00,0x03,0xd7,0x7f,0xf8,0x00,0x00]
+# W32-FAKE16: v_add_nc_u16 v5, exec_hi, null ; encoding: [0x05,0x00,0x03,0xd7,0x7f,0xf8,0x00,0x00]
+# W64-REAL16: v_add_nc_u16 v5.l, exec_hi, null ; encoding: [0x05,0x00,0x03,0xd7,0x7f,0xf8,0x00,0x00]
+# W64-FAKE16: v_add_nc_u16 v5, exec_hi, null ; encoding: [0x05,0x00,0x03,0xd7,0x7f,0xf8,0x00,0x00]
0x05,0x00,0x03,0xd7,0x7f,0xf8,0x00,0x00
-# GFX11: v_add_nc_u16 v5, null, exec_lo ; encoding: [0x05,0x00,0x03,0xd7,0x7c,0xfc,0x00,0x00]
+# W32-REAL16: v_add_nc_u16 v5.l, null, exec_lo ; encoding: [0x05,0x00,0x03,0xd7,0x7c,0xfc,0x00,0x00]
+# W32-FAKE16: v_add_nc_u16 v5, null, exec_lo ; encoding: [0x05,0x00,0x03,0xd7,0x7c,0xfc,0x00,0x00]
+# W64-REAL16: v_add_nc_u16 v5.l, null, exec_lo ; encoding: [0x05,0x00,0x03,0xd7,0x7c,0xfc,0x00,0x00]
+# W64-FAKE16: v_add_nc_u16 v5, null, exec_lo ; encoding: [0x05,0x00,0x03,0xd7,0x7c,0xfc,0x00,0x00]
0x05,0x00,0x03,0xd7,0x7c,0xfc,0x00,0x00
-# GFX11: v_add_nc_u16 v5, -1, exec_hi op_sel:[1,1,1] ; encoding: [0x05,0x58,0x03,0xd7,0xc1,0xfe,0x00,0x00]
-0x05,0x58,0x03,0xd7,0xc1,0xfe,0x00,0x00
-
-# GFX11: v_add_nc_u16 v5, 0x3800, m0 op_sel:[1,0,0]
-0x05,0x08,0x03,0xd7,0xf0,0xfa,0x00,0x00
-
-# GFX11: v_add_nc_u16 v5, src_scc, vcc_lo op_sel:[0,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xfd,0xd4,0x00,0x00]
-0x05,0x10,0x03,0xd7,0xfd,0xd4,0x00,0x00
+# W32-REAL16: v_add_nc_u16 v5.l, -1, exec_hi ; encoding: [0x05,0x00,0x03,0xd7,0xc1,0xfe,0x00,0x00]
+# W32-FAKE16: v_add_nc_u16 v5, -1, exec_hi ; encoding: [0x05,0x00,0x03,0xd7,0xc1,0xfe,0x00,0x00]
+# W64-REAL16: v_add_nc_u16 v5.l, -1, exec_hi ; encoding: [0x05,0x00,0x03,0xd7,0xc1,0xfe,0x00,0x00]
+# W64-FAKE16: v_add_nc_u16 v5, -1, exec_hi ; encoding: [0x05,0x00,0x03,0xd7,0xc1,0xfe,0x00,0x00]
+0x05,0x00,0x03,0xd7,0xc1,0xfe,0x00,0x00
+
+# W32-REAL16: v_add_nc_u16 v5.l, 0x3800, m0 ; encoding: [0x05,0x00,0x03,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00]
+# W32-FAKE16: v_add_nc_u16 v5, 0x3800, m0 ; encoding: [0x05,0x00,0x03,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00]
+# W64-REAL16: v_add_nc_u16 v5.l, 0x3800, m0 ; encoding: [0x05,0x00,0x03,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00]
+# W64-FAKE16: v_add_nc_u16 v5, 0x3800, m0 ; encoding: [0x05,0x00,0x03,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00]
+0x05,0x00,0x03,0xd7,0xf0,0xfa,0x00,0x00
+
+# W32-REAL16: v_add_nc_u16 v5.l, src_scc, vcc_lo ; encoding: [0x05,0x00,0x03,0xd7,0xfd,0xd4,0x00,0x00]
+# W32-FAKE16: v_add_nc_u16 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x03,0xd7,0xfd,0xd4,0x00,0x00]
+# W64-REAL16: v_add_nc_u16 v5.l, src_scc, vcc_lo ; encoding: [0x05,0x00,0x03,0xd7,0xfd,0xd4,0x00,0x00]
+# W64-FAKE16: v_add_nc_u16 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x03,0xd7,0xfd,0xd4,0x00,0x00]
+0x05,0x00,0x03,0xd7,0xfd,0xd4,0x00,0x00
+
+# W32-REAL16: v_add_nc_u16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x03,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_add_nc_u16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x03,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_add_nc_u16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x03,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_add_nc_u16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x03,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+0xff,0xc0,0x03,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00
-# GFX11: v_add_nc_u16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x03,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W32-REAL16: v_add_nc_u16 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x03,0xd7,0x01,0x05,0x02,0x00]
+# W32-FAKE16: v_add_nc_u16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x03,0xd7,0x01,0x05,0x02,0x00]
+# W64-REAL16: v_add_nc_u16 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x03,0xd7,0x01,0x05,0x02,0x00]
+# W64-FAKE16: v_add_nc_u16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x03,0xd7,0x01,0x05,0x02,0x00]
+0x05,0x08,0x03,0xd7,0x01,0x05,0x02,0x00
+
+# W32-REAL16: v_add_nc_u16 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xff,0xff,0x03,0x00]
+# W32-FAKE16: v_add_nc_u16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xff,0xff,0x03,0x00]
+# W64-REAL16: v_add_nc_u16 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xff,0xff,0x03,0x00]
+# W64-FAKE16: v_add_nc_u16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xff,0xff,0x03,0x00]
+0x05,0x10,0x03,0xd7,0xff,0xff,0x03,0x00
+
+# W32-REAL16: v_add_nc_u16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x03,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_add_nc_u16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x03,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_add_nc_u16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x03,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_add_nc_u16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x03,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
0xff,0xc0,0x03,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00
# GFX11: v_alignbit_b32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x16,0xd6,0x01,0x05,0x0e,0x00]
@@ -5871,49 +5997,112 @@
# GFX11: v_sub_co_u32 v255, null, 0xaf123456, vcc_hi clamp ; encoding: [0xff,0xfc,0x01,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf]
0xff,0xfc,0x01,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf
-# GFX11: v_sub_nc_i16 v5, v1, v2 ; encoding: [0x05,0x00,0x0e,0xd7,0x01,0x05,0x02,0x00]
-0x05,0x00,0x0e,0xd7,0x01,0x05,0x02,0x00
-
-# GFX11: v_sub_nc_i16 v5, v255, v255 ; encoding: [0x05,0x00,0x0e,0xd7,0xff,0xff,0x03,0x00]
-0x05,0x00,0x0e,0xd7,0xff,0xff,0x03,0x00
-
-# GFX11: v_sub_nc_i16 v5, s1, s2 ; encoding: [0x05,0x00,0x0e,0xd7,0x01,0x04,0x00,0x00]
+# W32-REAL16: v_sub_nc_i16 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0e,0xd7,0x01,0x05,0x02,0x00]
+# W32-FAKE16: v_sub_nc_i16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0e,0xd7,0x01,0x05,0x02,0x00]
+# W64-REAL16: v_sub_nc_i16 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0e,0xd7,0x01,0x05,0x02,0x00]
+# W64-FAKE16: v_sub_nc_i16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0e,0xd7,0x01,0x05,0x02,0x00]
+0x05,0x08,0x0e,0xd7,0x01,0x05,0x02,0x00
+
+# W32-REAL16: v_sub_nc_i16 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xff,0xff,0x03,0x00]
+# W32-FAKE16: v_sub_nc_i16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xff,0xff,0x03,0x00]
+# W64-REAL16: v_sub_nc_i16 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xff,0xff,0x03,0x00]
+# W64-FAKE16: v_sub_nc_i16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xff,0xff,0x03,0x00]
+0x05,0x10,0x0e,0xd7,0xff,0xff,0x03,0x00
+
+# W32-REAL16: v_sub_nc_i16 v5.l, s1, s2 ; encoding: [0x05,0x00,0x0e,0xd7,0x01,0x04,0x00,0x00]
+# W32-FAKE16: v_sub_nc_i16 v5, s1, s2 ; encoding: [0x05,0x00,0x0e,0xd7,0x01,0x04,0x00,0x00]
+# W64-REAL16: v_sub_nc_i16 v5.l, s1, s2 ; encoding: [0x05,0x00,0x0e,0xd7,0x01,0x04,0x00,0x00]
+# W64-FAKE16: v_sub_nc_i16 v5, s1, s2 ; encoding: [0x05,0x00,0x0e,0xd7,0x01,0x04,0x00,0x00]
0x05,0x00,0x0e,0xd7,0x01,0x04,0x00,0x00
-# GFX11: v_sub_nc_i16 v5, s105, s105 ; encoding: [0x05,0x00,0x0e,0xd7,0x69,0xd2,0x00,0x00]
+# W32-REAL16: v_sub_nc_i16 v5.l, s105, s105 ; encoding: [0x05,0x00,0x0e,0xd7,0x69,0xd2,0x00,0x00]
+# W32-FAKE16: v_sub_nc_i16 v5, s105, s105 ; encoding: [0x05,0x00,0x0e,0xd7,0x69,0xd2,0x00,0x00]
+# W64-REAL16: v_sub_nc_i16 v5.l, s105, s105 ; encoding: [0x05,0x00,0x0e,0xd7,0x69,0xd2,0x00,0x00]
+# W64-FAKE16: v_sub_nc_i16 v5, s105, s105 ; encoding: [0x05,0x00,0x0e,0xd7,0x69,0xd2,0x00,0x00]
0x05,0x00,0x0e,0xd7,0x69,0xd2,0x00,0x00
-# GFX11: v_sub_nc_i16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x0e,0xd7,0x6a,0xf6,0x00,0x00]
+# W32-REAL16: v_sub_nc_i16 v5.l, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x0e,0xd7,0x6a,0xf6,0x00,0x00]
+# W32-FAKE16: v_sub_nc_i16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x0e,0xd7,0x6a,0xf6,0x00,0x00]
+# W64-REAL16: v_sub_nc_i16 v5.l, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x0e,0xd7,0x6a,0xf6,0x00,0x00]
+# W64-FAKE16: v_sub_nc_i16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x0e,0xd7,0x6a,0xf6,0x00,0x00]
0x05,0x00,0x0e,0xd7,0x6a,0xf6,0x00,0x00
-# GFX11: v_sub_nc_i16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x0e,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+# W32-REAL16: v_sub_nc_i16 v5.l, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x0e,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_sub_nc_i16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x0e,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_sub_nc_i16 v5.l, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x0e,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_sub_nc_i16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x0e,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
0x05,0x00,0x0e,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00
-# GFX11: v_sub_nc_i16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x0e,0xd7,0x7b,0xfa,0x01,0x00]
+# W32-REAL16: v_sub_nc_i16 v5.l, ttmp15, src_scc ; encoding: [0x05,0x00,0x0e,0xd7,0x7b,0xfa,0x01,0x00]
+# W32-FAKE16: v_sub_nc_i16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x0e,0xd7,0x7b,0xfa,0x01,0x00]
+# W64-REAL16: v_sub_nc_i16 v5.l, ttmp15, src_scc ; encoding: [0x05,0x00,0x0e,0xd7,0x7b,0xfa,0x01,0x00]
+# W64-FAKE16: v_sub_nc_i16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x0e,0xd7,0x7b,0xfa,0x01,0x00]
0x05,0x00,0x0e,0xd7,0x7b,0xfa,0x01,0x00
-# GFX11: v_sub_nc_i16 v5, m0, 0x3800
+# W32-REAL16: v_sub_nc_i16 v5.l, m0, 0x3800 ; encoding: [0x05,0x00,0x0e,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00]
+# W32-FAKE16: v_sub_nc_i16 v5, m0, 0x3800 ; encoding: [0x05,0x00,0x0e,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00]
+# W64-REAL16: v_sub_nc_i16 v5.l, m0, 0x3800 ; encoding: [0x05,0x00,0x0e,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00]
+# W64-FAKE16: v_sub_nc_i16 v5, m0, 0x3800 ; encoding: [0x05,0x00,0x0e,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00]
0x05,0x00,0x0e,0xd7,0x7d,0xe0,0x01,0x00
-# GFX11: v_sub_nc_i16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x0e,0xd7,0x7e,0x82,0x01,0x00]
+# W32-REAL16: v_sub_nc_i16 v5.l, exec_lo, -1 ; encoding: [0x05,0x00,0x0e,0xd7,0x7e,0x82,0x01,0x00]
+# W32-FAKE16: v_sub_nc_i16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x0e,0xd7,0x7e,0x82,0x01,0x00]
+# W64-REAL16: v_sub_nc_i16 v5.l, exec_lo, -1 ; encoding: [0x05,0x00,0x0e,0xd7,0x7e,0x82,0x01,0x00]
+# W64-FAKE16: v_sub_nc_i16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x0e,0xd7,0x7e,0x82,0x01,0x00]
0x05,0x00,0x0e,0xd7,0x7e,0x82,0x01,0x00
-# GFX11: v_sub_nc_i16 v5, exec_hi, null ; encoding: [0x05,0x00,0x0e,0xd7,0x7f,0xf8,0x00,0x00]
+# W32-REAL16: v_sub_nc_i16 v5.l, exec_hi, null ; encoding: [0x05,0x00,0x0e,0xd7,0x7f,0xf8,0x00,0x00]
+# W32-FAKE16: v_sub_nc_i16 v5, exec_hi, null ; encoding: [0x05,0x00,0x0e,0xd7,0x7f,0xf8,0x00,0x00]
+# W64-REAL16: v_sub_nc_i16 v5.l, exec_hi, null ; encoding: [0x05,0x00,0x0e,0xd7,0x7f,0xf8,0x00,0x00]
+# W64-FAKE16: v_sub_nc_i16 v5, exec_hi, null ; encoding: [0x05,0x00,0x0e,0xd7,0x7f,0xf8,0x00,0x00]
0x05,0x00,0x0e,0xd7,0x7f,0xf8,0x00,0x00
-# GFX11: v_sub_nc_i16 v5, null, exec_lo ; encoding: [0x05,0x00,0x0e,0xd7,0x7c,0xfc,0x00,0x00]
+# W32-REAL16: v_sub_nc_i16 v5.l, null, exec_lo ; encoding: [0x05,0x00,0x0e,0xd7,0x7c,0xfc,0x00,0x00]
+# W32-FAKE16: v_sub_nc_i16 v5, null, exec_lo ; encoding: [0x05,0x00,0x0e,0xd7,0x7c,0xfc,0x00,0x00]
+# W64-REAL16: v_sub_nc_i16 v5.l, null, exec_lo ; encoding: [0x05,0x00,0x0e,0xd7,0x7c,0xfc,0x00,0x00]
+# W64-FAKE16: v_sub_nc_i16 v5, null, exec_lo ; encoding: [0x05,0x00,0x0e,0xd7,0x7c,0xfc,0x00,0x00]
0x05,0x00,0x0e,0xd7,0x7c,0xfc,0x00,0x00
-# GFX11: v_sub_nc_i16 v5, -1, exec_hi op_sel:[1,1,1] ; encoding: [0x05,0x58,0x0e,0xd7,0xc1,0xfe,0x00,0x00]
-0x05,0x58,0x0e,0xd7,0xc1,0xfe,0x00,0x00
-
-# GFX11: v_sub_nc_i16 v5, 0x3800, m0 op_sel:[1,0,0]
-0x05,0x08,0x0e,0xd7,0xf0,0xfa,0x00,0x00
-
-# GFX11: v_sub_nc_i16 v5, src_scc, vcc_lo op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xfd,0xd4,0x00,0x00]
-0x05,0x10,0x0e,0xd7,0xfd,0xd4,0x00,0x00
+# W32-REAL16: v_sub_nc_i16 v5.l, -1, exec_hi ; encoding: [0x05,0x00,0x0e,0xd7,0xc1,0xfe,0x00,0x00]
+# W32-FAKE16: v_sub_nc_i16 v5, -1, exec_hi ; encoding: [0x05,0x00,0x0e,0xd7,0xc1,0xfe,0x00,0x00]
+# W64-REAL16: v_sub_nc_i16 v5.l, -1, exec_hi ; encoding: [0x05,0x00,0x0e,0xd7,0xc1,0xfe,0x00,0x00]
+# W64-FAKE16: v_sub_nc_i16 v5, -1, exec_hi ; encoding: [0x05,0x00,0x0e,0xd7,0xc1,0xfe,0x00,0x00]
+0x05,0x00,0x0e,0xd7,0xc1,0xfe,0x00,0x00
+
+# W32-REAL16: v_sub_nc_i16 v5.l, 0x3800, m0 ; encoding: [0x05,0x00,0x0e,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00]
+# W32-FAKE16: v_sub_nc_i16 v5, 0x3800, m0 ; encoding: [0x05,0x00,0x0e,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00]
+# W64-REAL16: v_sub_nc_i16 v5.l, 0x3800, m0 ; encoding: [0x05,0x00,0x0e,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00]
+# W64-FAKE16: v_sub_nc_i16 v5, 0x3800, m0 ; encoding: [0x05,0x00,0x0e,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00]
+0x05,0x00,0x0e,0xd7,0xf0,0xfa,0x00,0x00
+
+# W32-REAL16: v_sub_nc_i16 v5.l, src_scc, vcc_lo ; encoding: [0x05,0x00,0x0e,0xd7,0xfd,0xd4,0x00,0x00]
+# W32-FAKE16: v_sub_nc_i16 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x0e,0xd7,0xfd,0xd4,0x00,0x00]
+# W64-REAL16: v_sub_nc_i16 v5.l, src_scc, vcc_lo ; encoding: [0x05,0x00,0x0e,0xd7,0xfd,0xd4,0x00,0x00]
+# W64-FAKE16: v_sub_nc_i16 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x0e,0xd7,0xfd,0xd4,0x00,0x00]
+0x05,0x00,0x0e,0xd7,0xfd,0xd4,0x00,0x00
+
+# W32-REAL16: v_sub_nc_i16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0e,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_sub_nc_i16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0e,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_sub_nc_i16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0e,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_sub_nc_i16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0e,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+0xff,0xc0,0x0e,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00
-# GFX11: v_sub_nc_i16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0e,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W32-REAL16: v_sub_nc_i16 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0e,0xd7,0x01,0x05,0x02,0x00]
+# W32-FAKE16: v_sub_nc_i16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0e,0xd7,0x01,0x05,0x02,0x00]
+# W64-REAL16: v_sub_nc_i16 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0e,0xd7,0x01,0x05,0x02,0x00]
+# W64-FAKE16: v_sub_nc_i16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0e,0xd7,0x01,0x05,0x02,0x00]
+0x05,0x08,0x0e,0xd7,0x01,0x05,0x02,0x00
+
+# W32-REAL16: v_sub_nc_i16 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xff,0xff,0x03,0x00]
+# W32-FAKE16: v_sub_nc_i16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xff,0xff,0x03,0x00]
+# W64-REAL16: v_sub_nc_i16 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xff,0xff,0x03,0x00]
+# W64-FAKE16: v_sub_nc_i16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xff,0xff,0x03,0x00]
+0x05,0x10,0x0e,0xd7,0xff,0xff,0x03,0x00
+
+# W32-REAL16: v_sub_nc_i16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0e,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_sub_nc_i16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0e,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_sub_nc_i16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0e,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_sub_nc_i16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0e,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
0xff,0xc0,0x0e,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00
# GFX11: v_sub_nc_i32 v5, v1, v2 ; encoding: [0x05,0x00,0x25,0xd7,0x01,0x05,0x02,0x00]
@@ -5961,49 +6150,112 @@
# GFX11: v_sub_nc_i32 v255, 0xaf123456, vcc_hi clamp ; encoding: [0xff,0x80,0x25,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf]
0xff,0x80,0x25,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf
-# GFX11: v_sub_nc_u16 v5, v1, v2 ; encoding: [0x05,0x00,0x04,0xd7,0x01,0x05,0x02,0x00]
-0x05,0x00,0x04,0xd7,0x01,0x05,0x02,0x00
-
-# GFX11: v_sub_nc_u16 v5, v255, v255 ; encoding: [0x05,0x00,0x04,0xd7,0xff,0xff,0x03,0x00]
-0x05,0x00,0x04,0xd7,0xff,0xff,0x03,0x00
-
-# GFX11: v_sub_nc_u16 v5, s1, s2 ; encoding: [0x05,0x00,0x04,0xd7,0x01,0x04,0x00,0x00]
+# W32-REAL16: v_sub_nc_u16 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x04,0xd7,0x01,0x05,0x02,0x00]
+# W32-FAKE16: v_sub_nc_u16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x04,0xd7,0x01,0x05,0x02,0x00]
+# W64-REAL16: v_sub_nc_u16 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x04,0xd7,0x01,0x05,0x02,0x00]
+# W64-FAKE16: v_sub_nc_u16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x04,0xd7,0x01,0x05,0x02,0x00]
+0x05,0x08,0x04,0xd7,0x01,0x05,0x02,0x00
+
+# W32-REAL16: v_sub_nc_u16 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xff,0xff,0x03,0x00]
+# W32-FAKE16: v_sub_nc_u16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xff,0xff,0x03,0x00]
+# W64-REAL16: v_sub_nc_u16 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xff,0xff,0x03,0x00]
+# W64-FAKE16: v_sub_nc_u16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xff,0xff,0x03,0x00]
+0x05,0x10,0x04,0xd7,0xff,0xff,0x03,0x00
+
+# W32-REAL16: v_sub_nc_u16 v5.l, s1, s2 ; encoding: [0x05,0x00,0x04,0xd7,0x01,0x04,0x00,0x00]
+# W32-FAKE16: v_sub_nc_u16 v5, s1, s2 ; encoding: [0x05,0x00,0x04,0xd7,0x01,0x04,0x00,0x00]
+# W64-REAL16: v_sub_nc_u16 v5.l, s1, s2 ; encoding: [0x05,0x00,0x04,0xd7,0x01,0x04,0x00,0x00]
+# W64-FAKE16: v_sub_nc_u16 v5, s1, s2 ; encoding: [0x05,0x00,0x04,0xd7,0x01,0x04,0x00,0x00]
0x05,0x00,0x04,0xd7,0x01,0x04,0x00,0x00
-# GFX11: v_sub_nc_u16 v5, s105, s105 ; encoding: [0x05,0x00,0x04,0xd7,0x69,0xd2,0x00,0x00]
+# W32-REAL16: v_sub_nc_u16 v5.l, s105, s105 ; encoding: [0x05,0x00,0x04,0xd7,0x69,0xd2,0x00,0x00]
+# W32-FAKE16: v_sub_nc_u16 v5, s105, s105 ; encoding: [0x05,0x00,0x04,0xd7,0x69,0xd2,0x00,0x00]
+# W64-REAL16: v_sub_nc_u16 v5.l, s105, s105 ; encoding: [0x05,0x00,0x04,0xd7,0x69,0xd2,0x00,0x00]
+# W64-FAKE16: v_sub_nc_u16 v5, s105, s105 ; encoding: [0x05,0x00,0x04,0xd7,0x69,0xd2,0x00,0x00]
0x05,0x00,0x04,0xd7,0x69,0xd2,0x00,0x00
-# GFX11: v_sub_nc_u16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x04,0xd7,0x6a,0xf6,0x00,0x00]
+# W32-REAL16: v_sub_nc_u16 v5.l, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x04,0xd7,0x6a,0xf6,0x00,0x00]
+# W32-FAKE16: v_sub_nc_u16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x04,0xd7,0x6a,0xf6,0x00,0x00]
+# W64-REAL16: v_sub_nc_u16 v5.l, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x04,0xd7,0x6a,0xf6,0x00,0x00]
+# W64-FAKE16: v_sub_nc_u16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x04,0xd7,0x6a,0xf6,0x00,0x00]
0x05,0x00,0x04,0xd7,0x6a,0xf6,0x00,0x00
-# GFX11: v_sub_nc_u16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x04,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+# W32-REAL16: v_sub_nc_u16 v5.l, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x04,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_sub_nc_u16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x04,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_sub_nc_u16 v5.l, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x04,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_sub_nc_u16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x04,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
0x05,0x00,0x04,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00
-# GFX11: v_sub_nc_u16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x04,0xd7,0x7b,0xfa,0x01,0x00]
+# W32-REAL16: v_sub_nc_u16 v5.l, ttmp15, src_scc ; encoding: [0x05,0x00,0x04,0xd7,0x7b,0xfa,0x01,0x00]
+# W32-FAKE16: v_sub_nc_u16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x04,0xd7,0x7b,0xfa,0x01,0x00]
+# W64-REAL16: v_sub_nc_u16 v5.l, ttmp15, src_scc ; encoding: [0x05,0x00,0x04,0xd7,0x7b,0xfa,0x01,0x00]
+# W64-FAKE16: v_sub_nc_u16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x04,0xd7,0x7b,0xfa,0x01,0x00]
0x05,0x00,0x04,0xd7,0x7b,0xfa,0x01,0x00
-# GFX11: v_sub_nc_u16 v5, m0, 0x3800
+# W32-REAL16: v_sub_nc_u16 v5.l, m0, 0x3800 ; encoding: [0x05,0x00,0x04,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00]
+# W32-FAKE16: v_sub_nc_u16 v5, m0, 0x3800 ; encoding: [0x05,0x00,0x04,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00]
+# W64-REAL16: v_sub_nc_u16 v5.l, m0, 0x3800 ; encoding: [0x05,0x00,0x04,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00]
+# W64-FAKE16: v_sub_nc_u16 v5, m0, 0x3800 ; encoding: [0x05,0x00,0x04,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00]
0x05,0x00,0x04,0xd7,0x7d,0xe0,0x01,0x00
-# GFX11: v_sub_nc_u16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x04,0xd7,0x7e,0x82,0x01,0x00]
+# W32-REAL16: v_sub_nc_u16 v5.l, exec_lo, -1 ; encoding: [0x05,0x00,0x04,0xd7,0x7e,0x82,0x01,0x00]
+# W32-FAKE16: v_sub_nc_u16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x04,0xd7,0x7e,0x82,0x01,0x00]
+# W64-REAL16: v_sub_nc_u16 v5.l, exec_lo, -1 ; encoding: [0x05,0x00,0x04,0xd7,0x7e,0x82,0x01,0x00]
+# W64-FAKE16: v_sub_nc_u16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x04,0xd7,0x7e,0x82,0x01,0x00]
0x05,0x00,0x04,0xd7,0x7e,0x82,0x01,0x00
-# GFX11: v_sub_nc_u16 v5, exec_hi, null ; encoding: [0x05,0x00,0x04,0xd7,0x7f,0xf8,0x00,0x00]
+# W32-REAL16: v_sub_nc_u16 v5.l, exec_hi, null ; encoding: [0x05,0x00,0x04,0xd7,0x7f,0xf8,0x00,0x00]
+# W32-FAKE16: v_sub_nc_u16 v5, exec_hi, null ; encoding: [0x05,0x00,0x04,0xd7,0x7f,0xf8,0x00,0x00]
+# W64-REAL16: v_sub_nc_u16 v5.l, exec_hi, null ; encoding: [0x05,0x00,0x04,0xd7,0x7f,0xf8,0x00,0x00]
+# W64-FAKE16: v_sub_nc_u16 v5, exec_hi, null ; encoding: [0x05,0x00,0x04,0xd7,0x7f,0xf8,0x00,0x00]
0x05,0x00,0x04,0xd7,0x7f,0xf8,0x00,0x00
-# GFX11: v_sub_nc_u16 v5, null, exec_lo ; encoding: [0x05,0x00,0x04,0xd7,0x7c,0xfc,0x00,0x00]
+# W32-REAL16: v_sub_nc_u16 v5.l, null, exec_lo ; encoding: [0x05,0x00,0x04,0xd7,0x7c,0xfc,0x00,0x00]
+# W32-FAKE16: v_sub_nc_u16 v5, null, exec_lo ; encoding: [0x05,0x00,0x04,0xd7,0x7c,0xfc,0x00,0x00]
+# W64-REAL16: v_sub_nc_u16 v5.l, null, exec_lo ; encoding: [0x05,0x00,0x04,0xd7,0x7c,0xfc,0x00,0x00]
+# W64-FAKE16: v_sub_nc_u16 v5, null, exec_lo ; encoding: [0x05,0x00,0x04,0xd7,0x7c,0xfc,0x00,0x00]
0x05,0x00,0x04,0xd7,0x7c,0xfc,0x00,0x00
-# GFX11: v_sub_nc_u16 v5, -1, exec_hi op_sel:[1,1,1] ; encoding: [0x05,0x58,0x04,0xd7,0xc1,0xfe,0x00,0x00]
-0x05,0x58,0x04,0xd7,0xc1,0xfe,0x00,0x00
-
-# GFX11: v_sub_nc_u16 v5, 0x3800, m0 op_sel:[1,0,0]
-0x05,0x08,0x04,0xd7,0xf0,0xfa,0x00,0x00
-
-# GFX11: v_sub_nc_u16 v5, src_scc, vcc_lo op_sel:[0,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xfd,0xd4,0x00,0x00]
-0x05,0x10,0x04,0xd7,0xfd,0xd4,0x00,0x00
+# W32-REAL16: v_sub_nc_u16 v5.l, -1, exec_hi ; encoding: [0x05,0x00,0x04,0xd7,0xc1,0xfe,0x00,0x00]
+# W32-FAKE16: v_sub_nc_u16 v5, -1, exec_hi ; encoding: [0x05,0x00,0x04,0xd7,0xc1,0xfe,0x00,0x00]
+# W64-REAL16: v_sub_nc_u16 v5.l, -1, exec_hi ; encoding: [0x05,0x00,0x04,0xd7,0xc1,0xfe,0x00,0x00]
+# W64-FAKE16: v_sub_nc_u16 v5, -1, exec_hi ; encoding: [0x05,0x00,0x04,0xd7,0xc1,0xfe,0x00,0x00]
+0x05,0x00,0x04,0xd7,0xc1,0xfe,0x00,0x00
+
+# W32-REAL16: v_sub_nc_u16 v5.l, 0x3800, m0 ; encoding: [0x05,0x00,0x04,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00]
+# W32-FAKE16: v_sub_nc_u16 v5, 0x3800, m0 ; encoding: [0x05,0x00,0x04,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00]
+# W64-REAL16: v_sub_nc_u16 v5.l, 0x3800, m0 ; encoding: [0x05,0x00,0x04,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00]
+# W64-FAKE16: v_sub_nc_u16 v5, 0x3800, m0 ; encoding: [0x05,0x00,0x04,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00]
+0x05,0x00,0x04,0xd7,0xf0,0xfa,0x00,0x00
+
+# W32-REAL16: v_sub_nc_u16 v5.l, src_scc, vcc_lo ; encoding: [0x05,0x00,0x04,0xd7,0xfd,0xd4,0x00,0x00]
+# W32-FAKE16: v_sub_nc_u16 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x04,0xd7,0xfd,0xd4,0x00,0x00]
+# W64-REAL16: v_sub_nc_u16 v5.l, src_scc, vcc_lo ; encoding: [0x05,0x00,0x04,0xd7,0xfd,0xd4,0x00,0x00]
+# W64-FAKE16: v_sub_nc_u16 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x04,0xd7,0xfd,0xd4,0x00,0x00]
+0x05,0x00,0x04,0xd7,0xfd,0xd4,0x00,0x00
+
+# W32-REAL16: v_sub_nc_u16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x04,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_sub_nc_u16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x04,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_sub_nc_u16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x04,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_sub_nc_u16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x04,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+0xff,0xc0,0x04,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00
-# GFX11: v_sub_nc_u16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x04,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W32-REAL16: v_sub_nc_u16 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x04,0xd7,0x01,0x05,0x02,0x00]
+# W32-FAKE16: v_sub_nc_u16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x04,0xd7,0x01,0x05,0x02,0x00]
+# W64-REAL16: v_sub_nc_u16 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x04,0xd7,0x01,0x05,0x02,0x00]
+# W64-FAKE16: v_sub_nc_u16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x04,0xd7,0x01,0x05,0x02,0x00]
+0x05,0x08,0x04,0xd7,0x01,0x05,0x02,0x00
+
+# W32-REAL16: v_sub_nc_u16 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xff,0xff,0x03,0x00]
+# W32-FAKE16: v_sub_nc_u16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xff,0xff,0x03,0x00]
+# W64-REAL16: v_sub_nc_u16 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xff,0xff,0x03,0x00]
+# W64-FAKE16: v_sub_nc_u16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xff,0xff,0x03,0x00]
+0x05,0x10,0x04,0xd7,0xff,0xff,0x03,0x00
+
+# W32-REAL16: v_sub_nc_u16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x04,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_sub_nc_u16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x04,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_sub_nc_u16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x04,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_sub_nc_u16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x04,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
0xff,0xc0,0x04,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00
# W32: v_subrev_co_u32 v5, s12, v1, v2 ; encoding: [0x05,0x0c,0x02,0xd7,0x01,0x05,0x02,0x00]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16.txt
index 4ae8b053f0e0f3..d0bd6398ad10a0 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16.txt
@@ -3824,88 +3824,220 @@
# W64-FAKE16: v_xor_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x64,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
0xff,0x00,0x64,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30
-# GFX11: v_add_nc_i16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff
-# GFX11: v_add_nc_i16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff
-# GFX11: v_add_nc_i16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff
-# GFX11: v_add_nc_i16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff
-# GFX11: v_add_nc_i16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff
-# GFX11: v_add_nc_i16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff
-# GFX11: v_add_nc_i16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff
-# GFX11: v_add_nc_i16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff
-# GFX11: v_add_nc_i16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff
-# GFX11: v_add_nc_i16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff
-# GFX11: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-REAL16: v_add_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-REAL16: v_add_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff
-# GFX11: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01
-# GFX11: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13
-# GFX11: v_add_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W32-REAL16: v_add_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_add_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30
-# GFX11: v_add_nc_u16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W32-REAL16: v_add_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-REAL16: v_add_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff
+
+# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01
+
+# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13
+
+# W32-REAL16: v_add_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_add_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30
+
+# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff
-# GFX11: v_add_nc_u16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff
-# GFX11: v_add_nc_u16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff
-# GFX11: v_add_nc_u16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff
-# GFX11: v_add_nc_u16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff
-# GFX11: v_add_nc_u16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff
-# GFX11: v_add_nc_u16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff
-# GFX11: v_add_nc_u16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff
-# GFX11: v_add_nc_u16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff
-# GFX11: v_add_nc_u16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff
-# GFX11: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-REAL16: v_add_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-REAL16: v_add_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff
+
+# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01
+
+# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13
+
+# W32-REAL16: v_add_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_add_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30
+
+# W32-REAL16: v_add_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-REAL16: v_add_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff
-# GFX11: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01
-# GFX11: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13
-# GFX11: v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W32-REAL16: v_add_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_add_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30
# GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
@@ -4664,88 +4796,220 @@
# GFX11: v_pack_b32_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x11,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
0xff,0x13,0x11,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30
-# GFX11: v_sub_nc_i16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff
-# GFX11: v_sub_nc_i16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff
-# GFX11: v_sub_nc_i16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff
-# GFX11: v_sub_nc_i16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff
-# GFX11: v_sub_nc_i16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff
-# GFX11: v_sub_nc_i16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff
-# GFX11: v_sub_nc_i16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff
-# GFX11: v_sub_nc_i16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff
-# GFX11: v_sub_nc_i16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff
-# GFX11: v_sub_nc_i16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff
-# GFX11: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-REAL16: v_sub_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff
-# GFX11: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01
-# GFX11: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13
-# GFX11: v_sub_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W32-REAL16: v_sub_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30
-# GFX11: v_sub_nc_u16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W32-REAL16: v_sub_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff
+
+# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01
+
+# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13
+
+# W32-REAL16: v_sub_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30
+
+# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff
-# GFX11: v_sub_nc_u16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff
-# GFX11: v_sub_nc_u16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff
-# GFX11: v_sub_nc_u16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff
-# GFX11: v_sub_nc_u16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff
-# GFX11: v_sub_nc_u16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff
-# GFX11: v_sub_nc_u16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff
-# GFX11: v_sub_nc_u16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff
-# GFX11: v_sub_nc_u16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff
-# GFX11: v_sub_nc_u16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff
-# GFX11: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-REAL16: v_sub_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff
+
+# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01
+
+# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13
+
+# W32-REAL16: v_sub_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30
+
+# W32-REAL16: v_sub_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff
-# GFX11: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01
-# GFX11: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13
-# GFX11: v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W32-REAL16: v_sub_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30
# GFX11: v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x00,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8.txt
index b44dba748666c7..cbf5a3d11e50b8 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8.txt
@@ -2168,34 +2168,112 @@
# W64-FAKE16: v_xor_b16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x64,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
0xff,0x00,0x64,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00
-# GFX11: v_add_nc_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
0x05,0x00,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# GFX11: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_add_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_add_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# GFX11: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
0x05,0x08,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# GFX11: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
0x05,0x10,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# GFX11: v_add_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W32-REAL16: v_add_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_add_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
0xff,0xc0,0x0d,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00
-# GFX11: v_add_nc_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_add_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_add_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
+
+# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+0x05,0x08,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
+
+# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+0x05,0x10,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
+
+# W32-REAL16: v_add_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_add_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+0xff,0xc0,0x0d,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00
+
+# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
0x05,0x00,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# GFX11: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_add_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_add_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+0x05,0x58,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
+
+# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+0x05,0x08,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
+
+# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+0x05,0x10,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
+
+# W32-REAL16: v_add_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_add_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+0xff,0xc0,0x03,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00
+
+# W32-REAL16: v_add_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_add_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
0x05,0x58,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# GFX11: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
0x05,0x08,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# GFX11: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
0x05,0x10,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# GFX11: v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W32-REAL16: v_add_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_add_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
0xff,0xc0,0x03,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00
# GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x12,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
@@ -2840,34 +2918,112 @@
# GFX11: v_pack_b32_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x11,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
0xff,0x13,0x11,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00
-# GFX11: v_sub_nc_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
0x05,0x00,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# GFX11: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_sub_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
0x05,0x58,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# GFX11: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
0x05,0x08,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# GFX11: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
0x05,0x10,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# GFX11: v_sub_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W32-REAL16: v_sub_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
0xff,0xc0,0x0e,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00
-# GFX11: v_sub_nc_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_sub_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+0x05,0x58,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
+
+# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+0x05,0x08,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
+
+# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+0x05,0x10,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
+
+# W32-REAL16: v_sub_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+0xff,0xc0,0x0e,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00
+
+# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
0x05,0x00,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# GFX11: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_sub_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+0x05,0x58,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
+
+# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+0x05,0x08,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
+
+# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+0x05,0x10,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
+
+# W32-REAL16: v_sub_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+0xff,0xc0,0x04,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00
+
+# W32-REAL16: v_sub_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
0x05,0x58,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# GFX11: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
0x05,0x08,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# GFX11: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
0x05,0x10,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# GFX11: v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W32-REAL16: v_sub_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
0xff,0xc0,0x04,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00
# GFX11: v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x00,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3.txt
index af04a31423b6fe..d9add27704d9d2 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3.txt
@@ -153,49 +153,115 @@
# GFX12: v_add_lshl_u32 v255, 0xaf123456, vcc_hi, null ; encoding: [0xff,0x00,0x47,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf]
0xff,0x00,0x47,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf
-# GFX12: v_add_nc_i16 v5, v1, v2 ; encoding: [0x05,0x00,0x0d,0xd7,0x01,0x05,0x02,0x00]
+# W32-REAL16: v_add_nc_i16 v5.l, v1.l, v2.l ; encoding: [0x05,0x00,0x0d,0xd7,0x01,0x05,0x02,0x00]
+# W32-FAKE16: v_add_nc_i16 v5, v1, v2 ; encoding: [0x05,0x00,0x0d,0xd7,0x01,0x05,0x02,0x00]
+# W64-REAL16: v_add_nc_i16 v5.l, v1.l, v2.l ; encoding: [0x05,0x00,0x0d,0xd7,0x01,0x05,0x02,0x00]
+# W64-FAKE16: v_add_nc_i16 v5, v1, v2 ; encoding: [0x05,0x00,0x0d,0xd7,0x01,0x05,0x02,0x00]
0x05,0x00,0x0d,0xd7,0x01,0x05,0x02,0x00
-# GFX12: v_add_nc_i16 v5, v255, v255 ; encoding: [0x05,0x00,0x0d,0xd7,0xff,0xff,0x03,0x00]
+# W32-REAL16: v_add_nc_i16 v5.l, v255.l, v255.l ; encoding: [0x05,0x00,0x0d,0xd7,0xff,0xff,0x03,0x00]
+# W32-FAKE16: v_add_nc_i16 v5, v255, v255 ; encoding: [0x05,0x00,0x0d,0xd7,0xff,0xff,0x03,0x00]
+# W64-REAL16: v_add_nc_i16 v5.l, v255.l, v255.l ; encoding: [0x05,0x00,0x0d,0xd7,0xff,0xff,0x03,0x00]
+# W64-FAKE16: v_add_nc_i16 v5, v255, v255 ; encoding: [0x05,0x00,0x0d,0xd7,0xff,0xff,0x03,0x00]
0x05,0x00,0x0d,0xd7,0xff,0xff,0x03,0x00
-# GFX12: v_add_nc_i16 v5, s1, s2 ; encoding: [0x05,0x00,0x0d,0xd7,0x01,0x04,0x00,0x00]
+# W32-REAL16: v_add_nc_i16 v5.l, s1, s2 ; encoding: [0x05,0x00,0x0d,0xd7,0x01,0x04,0x00,0x00]
+# W32-FAKE16: v_add_nc_i16 v5, s1, s2 ; encoding: [0x05,0x00,0x0d,0xd7,0x01,0x04,0x00,0x00]
+# W64-REAL16: v_add_nc_i16 v5.l, s1, s2 ; encoding: [0x05,0x00,0x0d,0xd7,0x01,0x04,0x00,0x00]
+# W64-FAKE16: v_add_nc_i16 v5, s1, s2 ; encoding: [0x05,0x00,0x0d,0xd7,0x01,0x04,0x00,0x00]
0x05,0x00,0x0d,0xd7,0x01,0x04,0x00,0x00
-# GFX12: v_add_nc_i16 v5, s105, s105 ; encoding: [0x05,0x00,0x0d,0xd7,0x69,0xd2,0x00,0x00]
+# W32-REAL16: v_add_nc_i16 v5.l, s105, s105 ; encoding: [0x05,0x00,0x0d,0xd7,0x69,0xd2,0x00,0x00]
+# W32-FAKE16: v_add_nc_i16 v5, s105, s105 ; encoding: [0x05,0x00,0x0d,0xd7,0x69,0xd2,0x00,0x00]
+# W64-REAL16: v_add_nc_i16 v5.l, s105, s105 ; encoding: [0x05,0x00,0x0d,0xd7,0x69,0xd2,0x00,0x00]
+# W64-FAKE16: v_add_nc_i16 v5, s105, s105 ; encoding: [0x05,0x00,0x0d,0xd7,0x69,0xd2,0x00,0x00]
0x05,0x00,0x0d,0xd7,0x69,0xd2,0x00,0x00
-# GFX12: v_add_nc_i16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x0d,0xd7,0x6a,0xf6,0x00,0x00]
+# W32-REAL16: v_add_nc_i16 v5.l, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x0d,0xd7,0x6a,0xf6,0x00,0x00]
+# W32-FAKE16: v_add_nc_i16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x0d,0xd7,0x6a,0xf6,0x00,0x00]
+# W64-REAL16: v_add_nc_i16 v5.l, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x0d,0xd7,0x6a,0xf6,0x00,0x00]
+# W64-FAKE16: v_add_nc_i16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x0d,0xd7,0x6a,0xf6,0x00,0x00]
0x05,0x00,0x0d,0xd7,0x6a,0xf6,0x00,0x00
-# GFX12: v_add_nc_i16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x0d,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+# W32-REAL16: v_add_nc_i16 v5.l, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x0d,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_add_nc_i16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x0d,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_add_nc_i16 v5.l, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x0d,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_add_nc_i16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x0d,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
0x05,0x00,0x0d,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00
-# GFX12: v_add_nc_i16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x0d,0xd7,0x7b,0xfa,0x01,0x00]
+# W32-REAL16: v_add_nc_i16 v5.l, ttmp15, src_scc ; encoding: [0x05,0x00,0x0d,0xd7,0x7b,0xfa,0x01,0x00]
+# W32-FAKE16: v_add_nc_i16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x0d,0xd7,0x7b,0xfa,0x01,0x00]
+# W64-REAL16: v_add_nc_i16 v5.l, ttmp15, src_scc ; encoding: [0x05,0x00,0x0d,0xd7,0x7b,0xfa,0x01,0x00]
+# W64-FAKE16: v_add_nc_i16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x0d,0xd7,0x7b,0xfa,0x01,0x00]
0x05,0x00,0x0d,0xd7,0x7b,0xfa,0x01,0x00
-# GFX12: v_add_nc_i16 v5, m0, 0x3800
+# W32-REAL16: v_add_nc_i16 v5.l, m0, 0x3800 ; encoding: [0x05,0x00,0x0d,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00]
+# W32-FAKE16: v_add_nc_i16 v5, m0, 0x3800 ; encoding: [0x05,0x00,0x0d,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00]
+# W64-REAL16: v_add_nc_i16 v5.l, m0, 0x3800 ; encoding: [0x05,0x00,0x0d,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00]
+# W64-FAKE16: v_add_nc_i16 v5, m0, 0x3800 ; encoding: [0x05,0x00,0x0d,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00]
0x05,0x00,0x0d,0xd7,0x7d,0xe0,0x01,0x00
-# GFX12: v_add_nc_i16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x0d,0xd7,0x7e,0x82,0x01,0x00]
+# W32-REAL16: v_add_nc_i16 v5.l, exec_lo, -1 ; encoding: [0x05,0x00,0x0d,0xd7,0x7e,0x82,0x01,0x00]
+# W32-FAKE16: v_add_nc_i16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x0d,0xd7,0x7e,0x82,0x01,0x00]
+# W64-REAL16: v_add_nc_i16 v5.l, exec_lo, -1 ; encoding: [0x05,0x00,0x0d,0xd7,0x7e,0x82,0x01,0x00]
+# W64-FAKE16: v_add_nc_i16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x0d,0xd7,0x7e,0x82,0x01,0x00]
0x05,0x00,0x0d,0xd7,0x7e,0x82,0x01,0x00
-# GFX12: v_add_nc_i16 v5, exec_hi, null ; encoding: [0x05,0x00,0x0d,0xd7,0x7f,0xf8,0x00,0x00]
+# W32-REAL16: v_add_nc_i16 v5.l, exec_hi, null ; encoding: [0x05,0x00,0x0d,0xd7,0x7f,0xf8,0x00,0x00]
+# W32-FAKE16: v_add_nc_i16 v5, exec_hi, null ; encoding: [0x05,0x00,0x0d,0xd7,0x7f,0xf8,0x00,0x00]
+# W64-REAL16: v_add_nc_i16 v5.l, exec_hi, null ; encoding: [0x05,0x00,0x0d,0xd7,0x7f,0xf8,0x00,0x00]
+# W64-FAKE16: v_add_nc_i16 v5, exec_hi, null ; encoding: [0x05,0x00,0x0d,0xd7,0x7f,0xf8,0x00,0x00]
0x05,0x00,0x0d,0xd7,0x7f,0xf8,0x00,0x00
-# GFX12: v_add_nc_i16 v5, null, exec_lo ; encoding: [0x05,0x00,0x0d,0xd7,0x7c,0xfc,0x00,0x00]
+# W32-REAL16: v_add_nc_i16 v5.l, null, exec_lo ; encoding: [0x05,0x00,0x0d,0xd7,0x7c,0xfc,0x00,0x00]
+# W32-FAKE16: v_add_nc_i16 v5, null, exec_lo ; encoding: [0x05,0x00,0x0d,0xd7,0x7c,0xfc,0x00,0x00]
+# W64-REAL16: v_add_nc_i16 v5.l, null, exec_lo ; encoding: [0x05,0x00,0x0d,0xd7,0x7c,0xfc,0x00,0x00]
+# W64-FAKE16: v_add_nc_i16 v5, null, exec_lo ; encoding: [0x05,0x00,0x0d,0xd7,0x7c,0xfc,0x00,0x00]
0x05,0x00,0x0d,0xd7,0x7c,0xfc,0x00,0x00
-# GFX12: v_add_nc_i16 v5, -1, exec_hi op_sel:[1,1,1] ; encoding: [0x05,0x58,0x0d,0xd7,0xc1,0xfe,0x00,0x00]
+# W32-REAL16: v_add_nc_i16 v5.h, -1, exec_hi op_sel:[1,1,1] ; encoding: [0x05,0x58,0x0d,0xd7,0xc1,0xfe,0x00,0x00]
+# W32-FAKE16: v_add_nc_i16 v5, -1, exec_hi op_sel:[1,1,1] ; encoding: [0x05,0x58,0x0d,0xd7,0xc1,0xfe,0x00,0x00]
+# W64-REAL16: v_add_nc_i16 v5.h, -1, exec_hi op_sel:[1,1,1] ; encoding: [0x05,0x58,0x0d,0xd7,0xc1,0xfe,0x00,0x00]
+# W64-FAKE16: v_add_nc_i16 v5, -1, exec_hi op_sel:[1,1,1] ; encoding: [0x05,0x58,0x0d,0xd7,0xc1,0xfe,0x00,0x00]
0x05,0x58,0x0d,0xd7,0xc1,0xfe,0x00,0x00
-# GFX12: v_add_nc_i16 v5, 0x3800, m0 op_sel:[1,0,0]
+# W32-REAL16: v_add_nc_i16 v5.l, 0x3800, m0 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0d,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00]
+# W32-FAKE16: v_add_nc_i16 v5, 0x3800, m0 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0d,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00]
+# W64-REAL16: v_add_nc_i16 v5.l, 0x3800, m0 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0d,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00]
+# W64-FAKE16: v_add_nc_i16 v5, 0x3800, m0 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0d,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00]
0x05,0x08,0x0d,0xd7,0xf0,0xfa,0x00,0x00
-# GFX12: v_add_nc_i16 v5, src_scc, vcc_lo op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xfd,0xd4,0x00,0x00]
+# W32-REAL16: v_add_nc_i16 v5.l, src_scc, vcc_lo op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xfd,0xd4,0x00,0x00]
+# W32-FAKE16: v_add_nc_i16 v5, src_scc, vcc_lo op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xfd,0xd4,0x00,0x00]
+# W64-REAL16: v_add_nc_i16 v5.l, src_scc, vcc_lo op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xfd,0xd4,0x00,0x00]
+# W64-FAKE16: v_add_nc_i16 v5, src_scc, vcc_lo op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xfd,0xd4,0x00,0x00]
0x05,0x10,0x0d,0xd7,0xfd,0xd4,0x00,0x00
-# GFX12: v_add_nc_i16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0d,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W32-REAL16: v_add_nc_i16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0d,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_add_nc_i16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0d,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_add_nc_i16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0d,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_add_nc_i16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0d,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+0xff,0xc0,0x0d,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00
+
+# GFX11: v_add_nc_i16 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0d,0xd7,0x01,0x05,0x02,0x00]
+# W32-REAL16: v_add_nc_i16 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0d,0xd7,0x01,0x05,0x02,0x00]
+# W32-FAKE16: v_add_nc_i16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0d,0xd7,0x01,0x05,0x02,0x00]
+# W64-REAL16: v_add_nc_i16 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0d,0xd7,0x01,0x05,0x02,0x00]
+# W64-FAKE16: v_add_nc_i16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0d,0xd7,0x01,0x05,0x02,0x00]
+0x05,0x08,0x0d,0xd7,0x01,0x05,0x02,0x00
+
+# GFX11: v_add_nc_i16 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xff,0xff,0x03,0x00]
+# W32-REAL16: v_add_nc_i16 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xff,0xff,0x03,0x00]
+# W32-FAKE16: v_add_nc_i16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xff,0xff,0x03,0x00]
+# W64-REAL16: v_add_nc_i16 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xff,0xff,0x03,0x00]
+# W64-FAKE16: v_add_nc_i16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xff,0xff,0x03,0x00]
+0x05,0x10,0x0d,0xd7,0xff,0xff,0x03,0x00
+
+# GFX11: v_add_nc_i16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0d,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W32-REAL16: v_add_nc_i16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0d,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_add_nc_i16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0d,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_add_nc_i16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0d,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_add_nc_i16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0d,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
0xff,0xc0,0x0d,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00
# GFX12: v_add_nc_i32 v5, v1, v2 ; encoding: [0x05,0x00,0x26,0xd7,0x01,0x05,0x02,0x00]
@@ -243,49 +309,115 @@
# GFX12: v_add_nc_i32 v255, 0xaf123456, vcc_hi clamp ; encoding: [0xff,0x80,0x26,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf]
0xff,0x80,0x26,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf
-# GFX12: v_add_nc_u16 v5, v1, v2 ; encoding: [0x05,0x00,0x03,0xd7,0x01,0x05,0x02,0x00]
+# W32-REAL16: v_add_nc_u16 v5.l, v1.l, v2.l ; encoding: [0x05,0x00,0x03,0xd7,0x01,0x05,0x02,0x00]
+# W32-FAKE16: v_add_nc_u16 v5, v1, v2 ; encoding: [0x05,0x00,0x03,0xd7,0x01,0x05,0x02,0x00]
+# W64-REAL16: v_add_nc_u16 v5.l, v1.l, v2.l ; encoding: [0x05,0x00,0x03,0xd7,0x01,0x05,0x02,0x00]
+# W64-FAKE16: v_add_nc_u16 v5, v1, v2 ; encoding: [0x05,0x00,0x03,0xd7,0x01,0x05,0x02,0x00]
0x05,0x00,0x03,0xd7,0x01,0x05,0x02,0x00
-# GFX12: v_add_nc_u16 v5, v255, v255 ; encoding: [0x05,0x00,0x03,0xd7,0xff,0xff,0x03,0x00]
+# W32-REAL16: v_add_nc_u16 v5.l, v255.l, v255.l ; encoding: [0x05,0x00,0x03,0xd7,0xff,0xff,0x03,0x00]
+# W32-FAKE16: v_add_nc_u16 v5, v255, v255 ; encoding: [0x05,0x00,0x03,0xd7,0xff,0xff,0x03,0x00]
+# W64-REAL16: v_add_nc_u16 v5.l, v255.l, v255.l ; encoding: [0x05,0x00,0x03,0xd7,0xff,0xff,0x03,0x00]
+# W64-FAKE16: v_add_nc_u16 v5, v255, v255 ; encoding: [0x05,0x00,0x03,0xd7,0xff,0xff,0x03,0x00]
0x05,0x00,0x03,0xd7,0xff,0xff,0x03,0x00
-# GFX12: v_add_nc_u16 v5, s1, s2 ; encoding: [0x05,0x00,0x03,0xd7,0x01,0x04,0x00,0x00]
+# W32-REAL16: v_add_nc_u16 v5.l, s1, s2 ; encoding: [0x05,0x00,0x03,0xd7,0x01,0x04,0x00,0x00]
+# W32-FAKE16: v_add_nc_u16 v5, s1, s2 ; encoding: [0x05,0x00,0x03,0xd7,0x01,0x04,0x00,0x00]
+# W64-REAL16: v_add_nc_u16 v5.l, s1, s2 ; encoding: [0x05,0x00,0x03,0xd7,0x01,0x04,0x00,0x00]
+# W64-FAKE16: v_add_nc_u16 v5, s1, s2 ; encoding: [0x05,0x00,0x03,0xd7,0x01,0x04,0x00,0x00]
0x05,0x00,0x03,0xd7,0x01,0x04,0x00,0x00
-# GFX12: v_add_nc_u16 v5, s105, s105 ; encoding: [0x05,0x00,0x03,0xd7,0x69,0xd2,0x00,0x00]
+# W32-REAL16: v_add_nc_u16 v5.l, s105, s105 ; encoding: [0x05,0x00,0x03,0xd7,0x69,0xd2,0x00,0x00]
+# W32-FAKE16: v_add_nc_u16 v5, s105, s105 ; encoding: [0x05,0x00,0x03,0xd7,0x69,0xd2,0x00,0x00]
+# W64-REAL16: v_add_nc_u16 v5.l, s105, s105 ; encoding: [0x05,0x00,0x03,0xd7,0x69,0xd2,0x00,0x00]
+# W64-FAKE16: v_add_nc_u16 v5, s105, s105 ; encoding: [0x05,0x00,0x03,0xd7,0x69,0xd2,0x00,0x00]
0x05,0x00,0x03,0xd7,0x69,0xd2,0x00,0x00
-# GFX12: v_add_nc_u16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x03,0xd7,0x6a,0xf6,0x00,0x00]
+# W32-REAL16: v_add_nc_u16 v5.l, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x03,0xd7,0x6a,0xf6,0x00,0x00]
+# W32-FAKE16: v_add_nc_u16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x03,0xd7,0x6a,0xf6,0x00,0x00]
+# W64-REAL16: v_add_nc_u16 v5.l, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x03,0xd7,0x6a,0xf6,0x00,0x00]
+# W64-FAKE16: v_add_nc_u16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x03,0xd7,0x6a,0xf6,0x00,0x00]
0x05,0x00,0x03,0xd7,0x6a,0xf6,0x00,0x00
-# GFX12: v_add_nc_u16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x03,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+# W32-REAL16: v_add_nc_u16 v5.l, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x03,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_add_nc_u16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x03,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_add_nc_u16 v5.l, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x03,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_add_nc_u16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x03,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
0x05,0x00,0x03,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00
-# GFX12: v_add_nc_u16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x03,0xd7,0x7b,0xfa,0x01,0x00]
+# W32-REAL16: v_add_nc_u16 v5.l, ttmp15, src_scc ; encoding: [0x05,0x00,0x03,0xd7,0x7b,0xfa,0x01,0x00]
+# W32-FAKE16: v_add_nc_u16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x03,0xd7,0x7b,0xfa,0x01,0x00]
+# W64-REAL16: v_add_nc_u16 v5.l, ttmp15, src_scc ; encoding: [0x05,0x00,0x03,0xd7,0x7b,0xfa,0x01,0x00]
+# W64-FAKE16: v_add_nc_u16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x03,0xd7,0x7b,0xfa,0x01,0x00]
0x05,0x00,0x03,0xd7,0x7b,0xfa,0x01,0x00
-# GFX12: v_add_nc_u16 v5, m0, 0x3800
+# W32-REAL16: v_add_nc_u16 v5.l, m0, 0x3800 ; encoding: [0x05,0x00,0x03,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00]
+# W32-FAKE16: v_add_nc_u16 v5, m0, 0x3800 ; encoding: [0x05,0x00,0x03,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00]
+# W64-REAL16: v_add_nc_u16 v5.l, m0, 0x3800 ; encoding: [0x05,0x00,0x03,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00]
+# W64-FAKE16: v_add_nc_u16 v5, m0, 0x3800 ; encoding: [0x05,0x00,0x03,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00]
0x05,0x00,0x03,0xd7,0x7d,0xe0,0x01,0x00
-# GFX12: v_add_nc_u16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x03,0xd7,0x7e,0x82,0x01,0x00]
+# W32-REAL16: v_add_nc_u16 v5.l, exec_lo, -1 ; encoding: [0x05,0x00,0x03,0xd7,0x7e,0x82,0x01,0x00]
+# W32-FAKE16: v_add_nc_u16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x03,0xd7,0x7e,0x82,0x01,0x00]
+# W64-REAL16: v_add_nc_u16 v5.l, exec_lo, -1 ; encoding: [0x05,0x00,0x03,0xd7,0x7e,0x82,0x01,0x00]
+# W64-FAKE16: v_add_nc_u16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x03,0xd7,0x7e,0x82,0x01,0x00]
0x05,0x00,0x03,0xd7,0x7e,0x82,0x01,0x00
-# GFX12: v_add_nc_u16 v5, exec_hi, null ; encoding: [0x05,0x00,0x03,0xd7,0x7f,0xf8,0x00,0x00]
+# W32-REAL16: v_add_nc_u16 v5.l, exec_hi, null ; encoding: [0x05,0x00,0x03,0xd7,0x7f,0xf8,0x00,0x00]
+# W32-FAKE16: v_add_nc_u16 v5, exec_hi, null ; encoding: [0x05,0x00,0x03,0xd7,0x7f,0xf8,0x00,0x00]
+# W64-REAL16: v_add_nc_u16 v5.l, exec_hi, null ; encoding: [0x05,0x00,0x03,0xd7,0x7f,0xf8,0x00,0x00]
+# W64-FAKE16: v_add_nc_u16 v5, exec_hi, null ; encoding: [0x05,0x00,0x03,0xd7,0x7f,0xf8,0x00,0x00]
0x05,0x00,0x03,0xd7,0x7f,0xf8,0x00,0x00
-# GFX12: v_add_nc_u16 v5, null, exec_lo ; encoding: [0x05,0x00,0x03,0xd7,0x7c,0xfc,0x00,0x00]
+# W32-REAL16: v_add_nc_u16 v5.l, null, exec_lo ; encoding: [0x05,0x00,0x03,0xd7,0x7c,0xfc,0x00,0x00]
+# W32-FAKE16: v_add_nc_u16 v5, null, exec_lo ; encoding: [0x05,0x00,0x03,0xd7,0x7c,0xfc,0x00,0x00]
+# W64-REAL16: v_add_nc_u16 v5.l, null, exec_lo ; encoding: [0x05,0x00,0x03,0xd7,0x7c,0xfc,0x00,0x00]
+# W64-FAKE16: v_add_nc_u16 v5, null, exec_lo ; encoding: [0x05,0x00,0x03,0xd7,0x7c,0xfc,0x00,0x00]
0x05,0x00,0x03,0xd7,0x7c,0xfc,0x00,0x00
-# GFX12: v_add_nc_u16 v5, -1, exec_hi op_sel:[1,1,1] ; encoding: [0x05,0x58,0x03,0xd7,0xc1,0xfe,0x00,0x00]
+# W32-REAL16: v_add_nc_u16 v5.h, -1, exec_hi op_sel:[1,1,1] ; encoding: [0x05,0x58,0x03,0xd7,0xc1,0xfe,0x00,0x00]
+# W32-FAKE16: v_add_nc_u16 v5, -1, exec_hi op_sel:[1,1,1] ; encoding: [0x05,0x58,0x03,0xd7,0xc1,0xfe,0x00,0x00]
+# W64-REAL16: v_add_nc_u16 v5.h, -1, exec_hi op_sel:[1,1,1] ; encoding: [0x05,0x58,0x03,0xd7,0xc1,0xfe,0x00,0x00]
+# W64-FAKE16: v_add_nc_u16 v5, -1, exec_hi op_sel:[1,1,1] ; encoding: [0x05,0x58,0x03,0xd7,0xc1,0xfe,0x00,0x00]
0x05,0x58,0x03,0xd7,0xc1,0xfe,0x00,0x00
-# GFX12: v_add_nc_u16 v5, 0x3800, m0 op_sel:[1,0,0]
+# W32-REAL16: v_add_nc_u16 v5.l, 0x3800, m0 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x03,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00]
+# W32-FAKE16: v_add_nc_u16 v5, 0x3800, m0 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x03,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00]
+# W64-REAL16: v_add_nc_u16 v5.l, 0x3800, m0 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x03,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00]
+# W64-FAKE16: v_add_nc_u16 v5, 0x3800, m0 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x03,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00]
0x05,0x08,0x03,0xd7,0xf0,0xfa,0x00,0x00
-# GFX12: v_add_nc_u16 v5, src_scc, vcc_lo op_sel:[0,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xfd,0xd4,0x00,0x00]
+# W32-REAL16: v_add_nc_u16 v5.l, src_scc, vcc_lo op_sel:[0,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xfd,0xd4,0x00,0x00]
+# W32-FAKE16: v_add_nc_u16 v5, src_scc, vcc_lo op_sel:[0,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xfd,0xd4,0x00,0x00]
+# W64-REAL16: v_add_nc_u16 v5.l, src_scc, vcc_lo op_sel:[0,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xfd,0xd4,0x00,0x00]
+# W64-FAKE16: v_add_nc_u16 v5, src_scc, vcc_lo op_sel:[0,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xfd,0xd4,0x00,0x00]
0x05,0x10,0x03,0xd7,0xfd,0xd4,0x00,0x00
-# GFX12: v_add_nc_u16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x03,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W32-REAL16: v_add_nc_u16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x03,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_add_nc_u16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x03,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_add_nc_u16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x03,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_add_nc_u16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x03,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+0xff,0xc0,0x03,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00
+
+# GFX11: v_add_nc_u16 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x03,0xd7,0x01,0x05,0x02,0x00]
+# W32-REAL16: v_add_nc_u16 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x03,0xd7,0x01,0x05,0x02,0x00]
+# W32-FAKE16: v_add_nc_u16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x03,0xd7,0x01,0x05,0x02,0x00]
+# W64-REAL16: v_add_nc_u16 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x03,0xd7,0x01,0x05,0x02,0x00]
+# W64-FAKE16: v_add_nc_u16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x03,0xd7,0x01,0x05,0x02,0x00]
+0x05,0x08,0x03,0xd7,0x01,0x05,0x02,0x00
+
+# GFX11: v_add_nc_u16 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xff,0xff,0x03,0x00]
+# W32-REAL16: v_add_nc_u16 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xff,0xff,0x03,0x00]
+# W32-FAKE16: v_add_nc_u16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xff,0xff,0x03,0x00]
+# W64-REAL16: v_add_nc_u16 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xff,0xff,0x03,0x00]
+# W64-FAKE16: v_add_nc_u16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xff,0xff,0x03,0x00]
+0x05,0x10,0x03,0xd7,0xff,0xff,0x03,0x00
+
+# GFX11: v_add_nc_u16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x03,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W32-REAL16: v_add_nc_u16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x03,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_add_nc_u16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x03,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_add_nc_u16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x03,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_add_nc_u16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x03,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
0xff,0xc0,0x03,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00
# GFX12: v_alignbit_b32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x16,0xd6,0x01,0x05,0x0e,0x00]
@@ -5797,49 +5929,115 @@
# GFX12: v_sub_co_u32 v255, null, 0xaf123456, vcc_hi clamp ; encoding: [0xff,0xfc,0x01,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf]
0xff,0xfc,0x01,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf
-# GFX12: v_sub_nc_i16 v5, v1, v2 ; encoding: [0x05,0x00,0x0e,0xd7,0x01,0x05,0x02,0x00]
+# W32-REAL16: v_sub_nc_i16 v5.l, v1.l, v2.l ; encoding: [0x05,0x00,0x0e,0xd7,0x01,0x05,0x02,0x00]
+# W32-FAKE16: v_sub_nc_i16 v5, v1, v2 ; encoding: [0x05,0x00,0x0e,0xd7,0x01,0x05,0x02,0x00]
+# W64-REAL16: v_sub_nc_i16 v5.l, v1.l, v2.l ; encoding: [0x05,0x00,0x0e,0xd7,0x01,0x05,0x02,0x00]
+# W64-FAKE16: v_sub_nc_i16 v5, v1, v2 ; encoding: [0x05,0x00,0x0e,0xd7,0x01,0x05,0x02,0x00]
0x05,0x00,0x0e,0xd7,0x01,0x05,0x02,0x00
-# GFX12: v_sub_nc_i16 v5, v255, v255 ; encoding: [0x05,0x00,0x0e,0xd7,0xff,0xff,0x03,0x00]
+# W32-REAL16: v_sub_nc_i16 v5.l, v255.l, v255.l ; encoding: [0x05,0x00,0x0e,0xd7,0xff,0xff,0x03,0x00]
+# W32-FAKE16: v_sub_nc_i16 v5, v255, v255 ; encoding: [0x05,0x00,0x0e,0xd7,0xff,0xff,0x03,0x00]
+# W64-REAL16: v_sub_nc_i16 v5.l, v255.l, v255.l ; encoding: [0x05,0x00,0x0e,0xd7,0xff,0xff,0x03,0x00]
+# W64-FAKE16: v_sub_nc_i16 v5, v255, v255 ; encoding: [0x05,0x00,0x0e,0xd7,0xff,0xff,0x03,0x00]
0x05,0x00,0x0e,0xd7,0xff,0xff,0x03,0x00
-# GFX12: v_sub_nc_i16 v5, s1, s2 ; encoding: [0x05,0x00,0x0e,0xd7,0x01,0x04,0x00,0x00]
+# W32-REAL16: v_sub_nc_i16 v5.l, s1, s2 ; encoding: [0x05,0x00,0x0e,0xd7,0x01,0x04,0x00,0x00]
+# W32-FAKE16: v_sub_nc_i16 v5, s1, s2 ; encoding: [0x05,0x00,0x0e,0xd7,0x01,0x04,0x00,0x00]
+# W64-REAL16: v_sub_nc_i16 v5.l, s1, s2 ; encoding: [0x05,0x00,0x0e,0xd7,0x01,0x04,0x00,0x00]
+# W64-FAKE16: v_sub_nc_i16 v5, s1, s2 ; encoding: [0x05,0x00,0x0e,0xd7,0x01,0x04,0x00,0x00]
0x05,0x00,0x0e,0xd7,0x01,0x04,0x00,0x00
-# GFX12: v_sub_nc_i16 v5, s105, s105 ; encoding: [0x05,0x00,0x0e,0xd7,0x69,0xd2,0x00,0x00]
+# W32-REAL16: v_sub_nc_i16 v5.l, s105, s105 ; encoding: [0x05,0x00,0x0e,0xd7,0x69,0xd2,0x00,0x00]
+# W32-FAKE16: v_sub_nc_i16 v5, s105, s105 ; encoding: [0x05,0x00,0x0e,0xd7,0x69,0xd2,0x00,0x00]
+# W64-REAL16: v_sub_nc_i16 v5.l, s105, s105 ; encoding: [0x05,0x00,0x0e,0xd7,0x69,0xd2,0x00,0x00]
+# W64-FAKE16: v_sub_nc_i16 v5, s105, s105 ; encoding: [0x05,0x00,0x0e,0xd7,0x69,0xd2,0x00,0x00]
0x05,0x00,0x0e,0xd7,0x69,0xd2,0x00,0x00
-# GFX12: v_sub_nc_i16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x0e,0xd7,0x6a,0xf6,0x00,0x00]
+# W32-REAL16: v_sub_nc_i16 v5.l, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x0e,0xd7,0x6a,0xf6,0x00,0x00]
+# W32-FAKE16: v_sub_nc_i16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x0e,0xd7,0x6a,0xf6,0x00,0x00]
+# W64-REAL16: v_sub_nc_i16 v5.l, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x0e,0xd7,0x6a,0xf6,0x00,0x00]
+# W64-FAKE16: v_sub_nc_i16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x0e,0xd7,0x6a,0xf6,0x00,0x00]
0x05,0x00,0x0e,0xd7,0x6a,0xf6,0x00,0x00
-# GFX12: v_sub_nc_i16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x0e,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+# W32-REAL16: v_sub_nc_i16 v5.l, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x0e,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_sub_nc_i16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x0e,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_sub_nc_i16 v5.l, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x0e,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_sub_nc_i16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x0e,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
0x05,0x00,0x0e,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00
-# GFX12: v_sub_nc_i16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x0e,0xd7,0x7b,0xfa,0x01,0x00]
+# W32-REAL16: v_sub_nc_i16 v5.l, ttmp15, src_scc ; encoding: [0x05,0x00,0x0e,0xd7,0x7b,0xfa,0x01,0x00]
+# W32-FAKE16: v_sub_nc_i16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x0e,0xd7,0x7b,0xfa,0x01,0x00]
+# W64-REAL16: v_sub_nc_i16 v5.l, ttmp15, src_scc ; encoding: [0x05,0x00,0x0e,0xd7,0x7b,0xfa,0x01,0x00]
+# W64-FAKE16: v_sub_nc_i16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x0e,0xd7,0x7b,0xfa,0x01,0x00]
0x05,0x00,0x0e,0xd7,0x7b,0xfa,0x01,0x00
-# GFX12: v_sub_nc_i16 v5, m0, 0x3800
+# W32-REAL16: v_sub_nc_i16 v5.l, m0, 0x3800 ; encoding: [0x05,0x00,0x0e,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00]
+# W32-FAKE16: v_sub_nc_i16 v5, m0, 0x3800 ; encoding: [0x05,0x00,0x0e,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00]
+# W64-REAL16: v_sub_nc_i16 v5.l, m0, 0x3800 ; encoding: [0x05,0x00,0x0e,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00]
+# W64-FAKE16: v_sub_nc_i16 v5, m0, 0x3800 ; encoding: [0x05,0x00,0x0e,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00]
0x05,0x00,0x0e,0xd7,0x7d,0xe0,0x01,0x00
-# GFX12: v_sub_nc_i16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x0e,0xd7,0x7e,0x82,0x01,0x00]
+# W32-REAL16: v_sub_nc_i16 v5.l, exec_lo, -1 ; encoding: [0x05,0x00,0x0e,0xd7,0x7e,0x82,0x01,0x00]
+# W32-FAKE16: v_sub_nc_i16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x0e,0xd7,0x7e,0x82,0x01,0x00]
+# W64-REAL16: v_sub_nc_i16 v5.l, exec_lo, -1 ; encoding: [0x05,0x00,0x0e,0xd7,0x7e,0x82,0x01,0x00]
+# W64-FAKE16: v_sub_nc_i16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x0e,0xd7,0x7e,0x82,0x01,0x00]
0x05,0x00,0x0e,0xd7,0x7e,0x82,0x01,0x00
-# GFX12: v_sub_nc_i16 v5, exec_hi, null ; encoding: [0x05,0x00,0x0e,0xd7,0x7f,0xf8,0x00,0x00]
+# W32-REAL16: v_sub_nc_i16 v5.l, exec_hi, null ; encoding: [0x05,0x00,0x0e,0xd7,0x7f,0xf8,0x00,0x00]
+# W32-FAKE16: v_sub_nc_i16 v5, exec_hi, null ; encoding: [0x05,0x00,0x0e,0xd7,0x7f,0xf8,0x00,0x00]
+# W64-REAL16: v_sub_nc_i16 v5.l, exec_hi, null ; encoding: [0x05,0x00,0x0e,0xd7,0x7f,0xf8,0x00,0x00]
+# W64-FAKE16: v_sub_nc_i16 v5, exec_hi, null ; encoding: [0x05,0x00,0x0e,0xd7,0x7f,0xf8,0x00,0x00]
0x05,0x00,0x0e,0xd7,0x7f,0xf8,0x00,0x00
-# GFX12: v_sub_nc_i16 v5, null, exec_lo ; encoding: [0x05,0x00,0x0e,0xd7,0x7c,0xfc,0x00,0x00]
+# W32-REAL16: v_sub_nc_i16 v5.l, null, exec_lo ; encoding: [0x05,0x00,0x0e,0xd7,0x7c,0xfc,0x00,0x00]
+# W32-FAKE16: v_sub_nc_i16 v5, null, exec_lo ; encoding: [0x05,0x00,0x0e,0xd7,0x7c,0xfc,0x00,0x00]
+# W64-REAL16: v_sub_nc_i16 v5.l, null, exec_lo ; encoding: [0x05,0x00,0x0e,0xd7,0x7c,0xfc,0x00,0x00]
+# W64-FAKE16: v_sub_nc_i16 v5, null, exec_lo ; encoding: [0x05,0x00,0x0e,0xd7,0x7c,0xfc,0x00,0x00]
0x05,0x00,0x0e,0xd7,0x7c,0xfc,0x00,0x00
-# GFX12: v_sub_nc_i16 v5, -1, exec_hi op_sel:[1,1,1] ; encoding: [0x05,0x58,0x0e,0xd7,0xc1,0xfe,0x00,0x00]
+# W32-REAL16: v_sub_nc_i16 v5.h, -1, exec_hi op_sel:[1,1,1] ; encoding: [0x05,0x58,0x0e,0xd7,0xc1,0xfe,0x00,0x00]
+# W32-FAKE16: v_sub_nc_i16 v5, -1, exec_hi op_sel:[1,1,1] ; encoding: [0x05,0x58,0x0e,0xd7,0xc1,0xfe,0x00,0x00]
+# W64-REAL16: v_sub_nc_i16 v5.h, -1, exec_hi op_sel:[1,1,1] ; encoding: [0x05,0x58,0x0e,0xd7,0xc1,0xfe,0x00,0x00]
+# W64-FAKE16: v_sub_nc_i16 v5, -1, exec_hi op_sel:[1,1,1] ; encoding: [0x05,0x58,0x0e,0xd7,0xc1,0xfe,0x00,0x00]
0x05,0x58,0x0e,0xd7,0xc1,0xfe,0x00,0x00
-# GFX12: v_sub_nc_i16 v5, 0x3800, m0 op_sel:[1,0,0]
+# W32-REAL16: v_sub_nc_i16 v5.l, 0x3800, m0 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0e,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00]
+# W32-FAKE16: v_sub_nc_i16 v5, 0x3800, m0 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0e,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00]
+# W64-REAL16: v_sub_nc_i16 v5.l, 0x3800, m0 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0e,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00]
+# W64-FAKE16: v_sub_nc_i16 v5, 0x3800, m0 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0e,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00]
0x05,0x08,0x0e,0xd7,0xf0,0xfa,0x00,0x00
-# GFX12: v_sub_nc_i16 v5, src_scc, vcc_lo op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xfd,0xd4,0x00,0x00]
+# W32-REAL16: v_sub_nc_i16 v5.l, src_scc, vcc_lo op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xfd,0xd4,0x00,0x00]
+# W32-FAKE16: v_sub_nc_i16 v5, src_scc, vcc_lo op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xfd,0xd4,0x00,0x00]
+# W64-REAL16: v_sub_nc_i16 v5.l, src_scc, vcc_lo op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xfd,0xd4,0x00,0x00]
+# W64-FAKE16: v_sub_nc_i16 v5, src_scc, vcc_lo op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xfd,0xd4,0x00,0x00]
0x05,0x10,0x0e,0xd7,0xfd,0xd4,0x00,0x00
-# GFX12: v_sub_nc_i16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0e,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W32-REAL16: v_sub_nc_i16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0e,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_sub_nc_i16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0e,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_sub_nc_i16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0e,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_sub_nc_i16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0e,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+0xff,0xc0,0x0e,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00
+
+# GFX11: v_sub_nc_i16 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0e,0xd7,0x01,0x05,0x02,0x00]
+# W32-REAL16: v_sub_nc_i16 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0e,0xd7,0x01,0x05,0x02,0x00]
+# W32-FAKE16: v_sub_nc_i16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0e,0xd7,0x01,0x05,0x02,0x00]
+# W64-REAL16: v_sub_nc_i16 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0e,0xd7,0x01,0x05,0x02,0x00]
+# W64-FAKE16: v_sub_nc_i16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0e,0xd7,0x01,0x05,0x02,0x00]
+0x05,0x08,0x0e,0xd7,0x01,0x05,0x02,0x00
+
+# GFX11: v_sub_nc_i16 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xff,0xff,0x03,0x00]
+# W32-REAL16: v_sub_nc_i16 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xff,0xff,0x03,0x00]
+# W32-FAKE16: v_sub_nc_i16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xff,0xff,0x03,0x00]
+# W64-REAL16: v_sub_nc_i16 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xff,0xff,0x03,0x00]
+# W64-FAKE16: v_sub_nc_i16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xff,0xff,0x03,0x00]
+0x05,0x10,0x0e,0xd7,0xff,0xff,0x03,0x00
+
+# GFX11: v_sub_nc_i16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0e,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W32-REAL16: v_sub_nc_i16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0e,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_sub_nc_i16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0e,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_sub_nc_i16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0e,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_sub_nc_i16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0e,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
0xff,0xc0,0x0e,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00
# GFX12: v_sub_nc_i32 v5, v1, v2 ; encoding: [0x05,0x00,0x25,0xd7,0x01,0x05,0x02,0x00]
@@ -5887,49 +6085,115 @@
# GFX12: v_sub_nc_i32 v255, 0xaf123456, vcc_hi clamp ; encoding: [0xff,0x80,0x25,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf]
0xff,0x80,0x25,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf
-# GFX12: v_sub_nc_u16 v5, v1, v2 ; encoding: [0x05,0x00,0x04,0xd7,0x01,0x05,0x02,0x00]
+# W32-REAL16: v_sub_nc_u16 v5.l, v1.l, v2.l ; encoding: [0x05,0x00,0x04,0xd7,0x01,0x05,0x02,0x00]
+# W32-FAKE16: v_sub_nc_u16 v5, v1, v2 ; encoding: [0x05,0x00,0x04,0xd7,0x01,0x05,0x02,0x00]
+# W64-REAL16: v_sub_nc_u16 v5.l, v1.l, v2.l ; encoding: [0x05,0x00,0x04,0xd7,0x01,0x05,0x02,0x00]
+# W64-FAKE16: v_sub_nc_u16 v5, v1, v2 ; encoding: [0x05,0x00,0x04,0xd7,0x01,0x05,0x02,0x00]
0x05,0x00,0x04,0xd7,0x01,0x05,0x02,0x00
-# GFX12: v_sub_nc_u16 v5, v255, v255 ; encoding: [0x05,0x00,0x04,0xd7,0xff,0xff,0x03,0x00]
+# W32-REAL16: v_sub_nc_u16 v5.l, v255.l, v255.l ; encoding: [0x05,0x00,0x04,0xd7,0xff,0xff,0x03,0x00]
+# W32-FAKE16: v_sub_nc_u16 v5, v255, v255 ; encoding: [0x05,0x00,0x04,0xd7,0xff,0xff,0x03,0x00]
+# W64-REAL16: v_sub_nc_u16 v5.l, v255.l, v255.l ; encoding: [0x05,0x00,0x04,0xd7,0xff,0xff,0x03,0x00]
+# W64-FAKE16: v_sub_nc_u16 v5, v255, v255 ; encoding: [0x05,0x00,0x04,0xd7,0xff,0xff,0x03,0x00]
0x05,0x00,0x04,0xd7,0xff,0xff,0x03,0x00
-# GFX12: v_sub_nc_u16 v5, s1, s2 ; encoding: [0x05,0x00,0x04,0xd7,0x01,0x04,0x00,0x00]
+# W32-REAL16: v_sub_nc_u16 v5.l, s1, s2 ; encoding: [0x05,0x00,0x04,0xd7,0x01,0x04,0x00,0x00]
+# W32-FAKE16: v_sub_nc_u16 v5, s1, s2 ; encoding: [0x05,0x00,0x04,0xd7,0x01,0x04,0x00,0x00]
+# W64-REAL16: v_sub_nc_u16 v5.l, s1, s2 ; encoding: [0x05,0x00,0x04,0xd7,0x01,0x04,0x00,0x00]
+# W64-FAKE16: v_sub_nc_u16 v5, s1, s2 ; encoding: [0x05,0x00,0x04,0xd7,0x01,0x04,0x00,0x00]
0x05,0x00,0x04,0xd7,0x01,0x04,0x00,0x00
-# GFX12: v_sub_nc_u16 v5, s105, s105 ; encoding: [0x05,0x00,0x04,0xd7,0x69,0xd2,0x00,0x00]
+# W32-REAL16: v_sub_nc_u16 v5.l, s105, s105 ; encoding: [0x05,0x00,0x04,0xd7,0x69,0xd2,0x00,0x00]
+# W32-FAKE16: v_sub_nc_u16 v5, s105, s105 ; encoding: [0x05,0x00,0x04,0xd7,0x69,0xd2,0x00,0x00]
+# W64-REAL16: v_sub_nc_u16 v5.l, s105, s105 ; encoding: [0x05,0x00,0x04,0xd7,0x69,0xd2,0x00,0x00]
+# W64-FAKE16: v_sub_nc_u16 v5, s105, s105 ; encoding: [0x05,0x00,0x04,0xd7,0x69,0xd2,0x00,0x00]
0x05,0x00,0x04,0xd7,0x69,0xd2,0x00,0x00
-# GFX12: v_sub_nc_u16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x04,0xd7,0x6a,0xf6,0x00,0x00]
+# W32-REAL16: v_sub_nc_u16 v5.l, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x04,0xd7,0x6a,0xf6,0x00,0x00]
+# W32-FAKE16: v_sub_nc_u16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x04,0xd7,0x6a,0xf6,0x00,0x00]
+# W64-REAL16: v_sub_nc_u16 v5.l, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x04,0xd7,0x6a,0xf6,0x00,0x00]
+# W64-FAKE16: v_sub_nc_u16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x04,0xd7,0x6a,0xf6,0x00,0x00]
0x05,0x00,0x04,0xd7,0x6a,0xf6,0x00,0x00
-# GFX12: v_sub_nc_u16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x04,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+# W32-REAL16: v_sub_nc_u16 v5.l, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x04,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_sub_nc_u16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x04,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_sub_nc_u16 v5.l, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x04,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_sub_nc_u16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x04,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
0x05,0x00,0x04,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00
-# GFX12: v_sub_nc_u16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x04,0xd7,0x7b,0xfa,0x01,0x00]
+# W32-REAL16: v_sub_nc_u16 v5.l, ttmp15, src_scc ; encoding: [0x05,0x00,0x04,0xd7,0x7b,0xfa,0x01,0x00]
+# W32-FAKE16: v_sub_nc_u16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x04,0xd7,0x7b,0xfa,0x01,0x00]
+# W64-REAL16: v_sub_nc_u16 v5.l, ttmp15, src_scc ; encoding: [0x05,0x00,0x04,0xd7,0x7b,0xfa,0x01,0x00]
+# W64-FAKE16: v_sub_nc_u16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x04,0xd7,0x7b,0xfa,0x01,0x00]
0x05,0x00,0x04,0xd7,0x7b,0xfa,0x01,0x00
-# GFX12: v_sub_nc_u16 v5, m0, 0x3800
+# W32-REAL16: v_sub_nc_u16 v5.l, m0, 0x3800 ; encoding: [0x05,0x00,0x04,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00]
+# W32-FAKE16: v_sub_nc_u16 v5, m0, 0x3800 ; encoding: [0x05,0x00,0x04,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00]
+# W64-REAL16: v_sub_nc_u16 v5.l, m0, 0x3800 ; encoding: [0x05,0x00,0x04,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00]
+# W64-FAKE16: v_sub_nc_u16 v5, m0, 0x3800 ; encoding: [0x05,0x00,0x04,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00]
0x05,0x00,0x04,0xd7,0x7d,0xe0,0x01,0x00
-# GFX12: v_sub_nc_u16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x04,0xd7,0x7e,0x82,0x01,0x00]
+# W32-REAL16: v_sub_nc_u16 v5.l, exec_lo, -1 ; encoding: [0x05,0x00,0x04,0xd7,0x7e,0x82,0x01,0x00]
+# W32-FAKE16: v_sub_nc_u16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x04,0xd7,0x7e,0x82,0x01,0x00]
+# W64-REAL16: v_sub_nc_u16 v5.l, exec_lo, -1 ; encoding: [0x05,0x00,0x04,0xd7,0x7e,0x82,0x01,0x00]
+# W64-FAKE16: v_sub_nc_u16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x04,0xd7,0x7e,0x82,0x01,0x00]
0x05,0x00,0x04,0xd7,0x7e,0x82,0x01,0x00
-# GFX12: v_sub_nc_u16 v5, exec_hi, null ; encoding: [0x05,0x00,0x04,0xd7,0x7f,0xf8,0x00,0x00]
+# W32-REAL16: v_sub_nc_u16 v5.l, exec_hi, null ; encoding: [0x05,0x00,0x04,0xd7,0x7f,0xf8,0x00,0x00]
+# W32-FAKE16: v_sub_nc_u16 v5, exec_hi, null ; encoding: [0x05,0x00,0x04,0xd7,0x7f,0xf8,0x00,0x00]
+# W64-REAL16: v_sub_nc_u16 v5.l, exec_hi, null ; encoding: [0x05,0x00,0x04,0xd7,0x7f,0xf8,0x00,0x00]
+# W64-FAKE16: v_sub_nc_u16 v5, exec_hi, null ; encoding: [0x05,0x00,0x04,0xd7,0x7f,0xf8,0x00,0x00]
0x05,0x00,0x04,0xd7,0x7f,0xf8,0x00,0x00
-# GFX12: v_sub_nc_u16 v5, null, exec_lo ; encoding: [0x05,0x00,0x04,0xd7,0x7c,0xfc,0x00,0x00]
+# W32-REAL16: v_sub_nc_u16 v5.l, null, exec_lo ; encoding: [0x05,0x00,0x04,0xd7,0x7c,0xfc,0x00,0x00]
+# W32-FAKE16: v_sub_nc_u16 v5, null, exec_lo ; encoding: [0x05,0x00,0x04,0xd7,0x7c,0xfc,0x00,0x00]
+# W64-REAL16: v_sub_nc_u16 v5.l, null, exec_lo ; encoding: [0x05,0x00,0x04,0xd7,0x7c,0xfc,0x00,0x00]
+# W64-FAKE16: v_sub_nc_u16 v5, null, exec_lo ; encoding: [0x05,0x00,0x04,0xd7,0x7c,0xfc,0x00,0x00]
0x05,0x00,0x04,0xd7,0x7c,0xfc,0x00,0x00
-# GFX12: v_sub_nc_u16 v5, -1, exec_hi op_sel:[1,1,1] ; encoding: [0x05,0x58,0x04,0xd7,0xc1,0xfe,0x00,0x00]
+# W32-REAL16: v_sub_nc_u16 v5.h, -1, exec_hi op_sel:[1,1,1] ; encoding: [0x05,0x58,0x04,0xd7,0xc1,0xfe,0x00,0x00]
+# W32-FAKE16: v_sub_nc_u16 v5, -1, exec_hi op_sel:[1,1,1] ; encoding: [0x05,0x58,0x04,0xd7,0xc1,0xfe,0x00,0x00]
+# W64-REAL16: v_sub_nc_u16 v5.h, -1, exec_hi op_sel:[1,1,1] ; encoding: [0x05,0x58,0x04,0xd7,0xc1,0xfe,0x00,0x00]
+# W64-FAKE16: v_sub_nc_u16 v5, -1, exec_hi op_sel:[1,1,1] ; encoding: [0x05,0x58,0x04,0xd7,0xc1,0xfe,0x00,0x00]
0x05,0x58,0x04,0xd7,0xc1,0xfe,0x00,0x00
-# GFX12: v_sub_nc_u16 v5, 0x3800, m0 op_sel:[1,0,0]
+# W32-REAL16: v_sub_nc_u16 v5.l, 0x3800, m0 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x04,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00]
+# W32-FAKE16: v_sub_nc_u16 v5, 0x3800, m0 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x04,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00]
+# W64-REAL16: v_sub_nc_u16 v5.l, 0x3800, m0 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x04,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00]
+# W64-FAKE16: v_sub_nc_u16 v5, 0x3800, m0 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x04,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00]
0x05,0x08,0x04,0xd7,0xf0,0xfa,0x00,0x00
-# GFX12: v_sub_nc_u16 v5, src_scc, vcc_lo op_sel:[0,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xfd,0xd4,0x00,0x00]
+# W32-REAL16: v_sub_nc_u16 v5.l, src_scc, vcc_lo op_sel:[0,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xfd,0xd4,0x00,0x00]
+# W32-FAKE16: v_sub_nc_u16 v5, src_scc, vcc_lo op_sel:[0,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xfd,0xd4,0x00,0x00]
+# W64-REAL16: v_sub_nc_u16 v5.l, src_scc, vcc_lo op_sel:[0,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xfd,0xd4,0x00,0x00]
+# W64-FAKE16: v_sub_nc_u16 v5, src_scc, vcc_lo op_sel:[0,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xfd,0xd4,0x00,0x00]
0x05,0x10,0x04,0xd7,0xfd,0xd4,0x00,0x00
-# GFX12: v_sub_nc_u16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x04,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W32-REAL16: v_sub_nc_u16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x04,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_sub_nc_u16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x04,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_sub_nc_u16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x04,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_sub_nc_u16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x04,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+0xff,0xc0,0x04,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00
+
+# GFX11: v_sub_nc_u16 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x04,0xd7,0x01,0x05,0x02,0x00]
+# W32-REAL16: v_sub_nc_u16 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x04,0xd7,0x01,0x05,0x02,0x00]
+# W32-FAKE16: v_sub_nc_u16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x04,0xd7,0x01,0x05,0x02,0x00]
+# W64-REAL16: v_sub_nc_u16 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x04,0xd7,0x01,0x05,0x02,0x00]
+# W64-FAKE16: v_sub_nc_u16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x04,0xd7,0x01,0x05,0x02,0x00]
+0x05,0x08,0x04,0xd7,0x01,0x05,0x02,0x00
+
+# GFX11: v_sub_nc_u16 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xff,0xff,0x03,0x00]
+# W32-REAL16: v_sub_nc_u16 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xff,0xff,0x03,0x00]
+# W32-FAKE16: v_sub_nc_u16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xff,0xff,0x03,0x00]
+# W64-REAL16: v_sub_nc_u16 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xff,0xff,0x03,0x00]
+# W64-FAKE16: v_sub_nc_u16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xff,0xff,0x03,0x00]
+0x05,0x10,0x04,0xd7,0xff,0xff,0x03,0x00
+
+# GFX11: v_sub_nc_u16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x04,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W32-REAL16: v_sub_nc_u16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x04,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_sub_nc_u16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x04,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_sub_nc_u16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x04,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_sub_nc_u16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x04,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
0xff,0xc0,0x04,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00
# W32: v_subrev_co_u32 v5, s12, v1, v2 ; encoding: [0x05,0x0c,0x02,0xd7,0x01,0x05,0x02,0x00]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp16.txt
index 65cfdd5ef7de03..5815a6be79954d 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp16.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp16.txt
@@ -4115,88 +4115,276 @@
# W64-FAKE16: v_xor_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x64,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
0xff,0x00,0x64,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30
-# GFX12: v_add_nc_i16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff
-# GFX12: v_add_nc_i16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff
-# GFX12: v_add_nc_i16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff
-# GFX12: v_add_nc_i16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff
-# GFX12: v_add_nc_i16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff
-# GFX12: v_add_nc_i16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff
-# GFX12: v_add_nc_i16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff
-# GFX12: v_add_nc_i16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff
-# GFX12: v_add_nc_i16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff
-# GFX12: v_add_nc_i16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff
-# GFX12: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-REAL16: v_add_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-REAL16: v_add_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff
-# GFX12: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01
-# GFX12: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13
-# GFX12: v_add_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W32-REAL16: v_add_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_add_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30
-# GFX12: v_add_nc_u16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W32-REAL16: v_add_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-REAL16: v_add_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff
+
+# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01
+
+# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13
+
+# W32-REAL16: v_add_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_add_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30
+
+# GFX11: v_add_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-REAL16: v_add_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-REAL16: v_add_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff
+
+# GFX11: v_add_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01
+
+# GFX11: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13
+
+# GFX11: v_add_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W32-REAL16: v_add_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_add_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30
+
+# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff
-# GFX12: v_add_nc_u16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff
-# GFX12: v_add_nc_u16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff
-# GFX12: v_add_nc_u16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff
-# GFX12: v_add_nc_u16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff
-# GFX12: v_add_nc_u16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff
-# GFX12: v_add_nc_u16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff
-# GFX12: v_add_nc_u16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff
-# GFX12: v_add_nc_u16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff
-# GFX12: v_add_nc_u16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff
-# GFX12: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-REAL16: v_add_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-REAL16: v_add_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff
+
+# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01
+
+# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13
+
+# W32-REAL16: v_add_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_add_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30
+
+# W32-REAL16: v_add_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-REAL16: v_add_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff
+
+# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01
+
+# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13
+
+# W32-REAL16: v_add_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_add_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30
+
+# GFX11: v_add_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-REAL16: v_add_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-REAL16: v_add_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff
-# GFX12: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# GFX11: v_add_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01
-# GFX12: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# GFX11: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13
-# GFX12: v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# GFX11: v_add_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W32-REAL16: v_add_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_add_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30
# GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
@@ -5000,88 +5188,276 @@
# GFX12: v_pack_b32_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x11,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
0xff,0x13,0x11,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30
-# GFX12: v_sub_nc_i16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff
-# GFX12: v_sub_nc_i16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff
-# GFX12: v_sub_nc_i16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff
-# GFX12: v_sub_nc_i16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff
-# GFX12: v_sub_nc_i16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff
-# GFX12: v_sub_nc_i16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff
-# GFX12: v_sub_nc_i16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff
-# GFX12: v_sub_nc_i16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff
-# GFX12: v_sub_nc_i16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff
-# GFX12: v_sub_nc_i16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff
-# GFX12: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-REAL16: v_sub_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff
-# GFX12: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01
-# GFX12: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13
-# GFX12: v_sub_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W32-REAL16: v_sub_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30
-# GFX12: v_sub_nc_u16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W32-REAL16: v_sub_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff
+
+# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01
+
+# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13
+
+# W32-REAL16: v_sub_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30
+
+# GFX11: v_sub_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-REAL16: v_sub_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff
+
+# GFX11: v_sub_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01
+
+# GFX11: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13
+
+# GFX11: v_sub_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W32-REAL16: v_sub_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30
+
+# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff
-# GFX12: v_sub_nc_u16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff
-# GFX12: v_sub_nc_u16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff
-# GFX12: v_sub_nc_u16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff
-# GFX12: v_sub_nc_u16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff
-# GFX12: v_sub_nc_u16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff
-# GFX12: v_sub_nc_u16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff
-# GFX12: v_sub_nc_u16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff
-# GFX12: v_sub_nc_u16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff
-# GFX12: v_sub_nc_u16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff
-# GFX12: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-REAL16: v_sub_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff
+
+# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01
+
+# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13
+
+# W32-REAL16: v_sub_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30
+
+# W32-REAL16: v_sub_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff
+
+# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01
+
+# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13
+
+# W32-REAL16: v_sub_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30
+
+# GFX11: v_sub_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-REAL16: v_sub_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff
-# GFX12: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# GFX11: v_sub_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01
-# GFX12: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# GFX11: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13
-# GFX12: v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# GFX11: v_sub_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W32-REAL16: v_sub_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30
# GFX12: v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x00,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp8.txt
index 4640b967cbc07b..8636891fca9483 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp8.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp8.txt
@@ -2393,34 +2393,168 @@
# W64-FAKE16: v_xor_b16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x64,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
0xff,0x00,0x64,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00
-# GFX12: v_add_nc_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
0x05,0x00,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# GFX12: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_add_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_add_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# GFX12: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
0x05,0x08,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# GFX12: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
0x05,0x10,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# GFX12: v_add_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W32-REAL16: v_add_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_add_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
0xff,0xc0,0x0d,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00
-# GFX12: v_add_nc_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_add_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_add_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
+
+# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+0x05,0x08,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
+
+# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+0x05,0x10,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
+
+# W32-REAL16: v_add_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_add_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+0xff,0xc0,0x0d,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00
+
+# GFX11: v_add_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_add_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_add_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
+
+# GFX11: v_add_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+0x05,0x08,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
+
+# GFX11: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+0x05,0x10,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
+
+# GFX11: v_add_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W32-REAL16: v_add_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_add_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_add_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_add_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+0xff,0xc0,0x0d,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00
+
+# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
0x05,0x00,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# GFX12: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_add_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_add_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+0x05,0x58,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
+
+# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+0x05,0x08,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
+
+# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+0x05,0x10,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
+
+# W32-REAL16: v_add_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_add_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+0xff,0xc0,0x03,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00
+
+# W32-REAL16: v_add_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_add_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+0x05,0x58,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
+
+# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+0x05,0x08,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
+
+# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+0x05,0x10,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
+
+# W32-REAL16: v_add_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_add_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+0xff,0xc0,0x03,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00
+
+# GFX11: v_add_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_add_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_add_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
0x05,0x58,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# GFX12: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# GFX11: v_add_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
0x05,0x08,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# GFX12: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# GFX11: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
0x05,0x10,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# GFX12: v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# GFX11: v_add_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W32-REAL16: v_add_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_add_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
0xff,0xc0,0x03,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00
# GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x12,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
@@ -3113,34 +3247,168 @@
# GFX12: v_pack_b32_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x11,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
0xff,0x13,0x11,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00
-# GFX12: v_sub_nc_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
0x05,0x00,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# GFX12: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_sub_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
0x05,0x58,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# GFX12: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
0x05,0x08,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# GFX12: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
0x05,0x10,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# GFX12: v_sub_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W32-REAL16: v_sub_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
0xff,0xc0,0x0e,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00
-# GFX12: v_sub_nc_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_sub_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+0x05,0x58,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
+
+# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+0x05,0x08,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
+
+# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+0x05,0x10,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
+
+# W32-REAL16: v_sub_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+0xff,0xc0,0x0e,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00
+
+# GFX11: v_sub_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_sub_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+0x05,0x58,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
+
+# GFX11: v_sub_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+0x05,0x08,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
+
+# GFX11: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+0x05,0x10,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
+
+# GFX11: v_sub_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W32-REAL16: v_sub_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_sub_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_sub_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_sub_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+0xff,0xc0,0x0e,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00
+
+# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
0x05,0x00,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# GFX12: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_sub_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+0x05,0x58,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
+
+# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+0x05,0x08,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
+
+# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+0x05,0x10,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
+
+# W32-REAL16: v_sub_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+0xff,0xc0,0x04,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00
+
+# W32-REAL16: v_sub_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+0x05,0x58,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
+
+# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+0x05,0x08,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
+
+# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+0x05,0x10,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
+
+# W32-REAL16: v_sub_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+0xff,0xc0,0x04,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00
+
+# GFX11: v_sub_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_sub_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
0x05,0x58,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# GFX12: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# GFX11: v_sub_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
0x05,0x08,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# GFX12: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# GFX11: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
0x05,0x10,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# GFX12: v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# GFX11: v_sub_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W32-REAL16: v_sub_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_sub_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
0xff,0xc0,0x04,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00
# GFX12: v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x00,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92]
>From 4e9bb716fa40c565e96fecdbdeed6f2e0b831e4d Mon Sep 17 00:00:00 2001
From: guochen2 <guochen2 at amd.com>
Date: Mon, 7 Oct 2024 15:24:44 -0400
Subject: [PATCH 2/3] address reviewers comment
---
llvm/lib/Target/AMDGPU/SIInstrInfo.td | 2 +
llvm/lib/Target/AMDGPU/VOP2Instructions.td | 4 +-
llvm/lib/Target/AMDGPU/VOP3Instructions.td | 16 +++-----
llvm/lib/Target/AMDGPU/VOPInstructions.td | 10 ++---
llvm/test/MC/AMDGPU/gfx12_asm_opsel.s | 48 ----------------------
5 files changed, 14 insertions(+), 66 deletions(-)
delete mode 100644 llvm/test/MC/AMDGPU/gfx12_asm_opsel.s
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index be42d3595bea61..573147db57b208 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -2149,6 +2149,8 @@ class getAsmVOP3P <int NumSrcArgs, bit HasModifiers,
string ret = dst#", "#src0#src1#src2#opsel#mods#clamp;
}
+//FIXME-TRUE16 AsmVOP3OpSel will be deprecated after all
+//VOP3 16 bit instructions are replaced to true16 format
class getAsmVOP3OpSel <int NumSrcArgs,
bit HasClamp,
bit HasOMod,
diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
index f411ea277bbe23..bc93f723490c7c 100644
--- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
@@ -1661,8 +1661,8 @@ multiclass VOP3Only_Realtriple_gfx11_gfx12<bits<10> op> :
VOP3Only_Realtriple<GFX11Gen, op>, VOP3Only_Realtriple<GFX12Gen, op>;
multiclass VOP3Only_Realtriple_t16_gfx11_gfx12<bits<10> op, string asmName, string OpName = NAME> :
- VOP3_Realtriple_t16_gfx11<op, asmName, OpName, "", 1>,
- VOP3_Realtriple_t16_gfx12<op, asmName, OpName, "", 1>;
+ VOP3_Realtriple_t16_gfx11<op, asmName, OpName, "", /*IsSingle*/1>,
+ VOP3_Realtriple_t16_gfx12<op, asmName, OpName, "", /*IsSingle*/1>;
multiclass VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<bits<10> op, string asmName, string OpName = NAME> {
defm OpName#"_t16": VOP3Only_Realtriple_t16_gfx11_gfx12<op, asmName, OpName#"_t16">;
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index fe37b56bcbf862..ddf7cf2a228c49 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -756,7 +756,7 @@ class OpSelBinOpClampPat<SDPatternOperator node,
(inst $src0_modifiers, $src0, $src1_modifiers, $src1, DSTCLAMP.ENABLE, 0)
>;
-let OtherPredicates = [isGFX9Plus], True16Predicate = NotHasTrue16BitInsts in {
+let SubtargetPredicate = isGFX9Plus, True16Predicate = NotHasTrue16BitInsts in {
def : OpSelBinOpClampPat<saddsat, V_ADD_I16_e64>;
def : OpSelBinOpClampPat<ssubsat, V_SUB_I16_e64>;
} // End OtherPredicates = [isGFX9Plus], True16Predicate = NotHasTrue16BitInsts
@@ -877,7 +877,7 @@ let SubtargetPredicate = isGFX10Plus in {
} // End SubtargetPredicate = isGFX10Plus
-let True16Predicate = NotHasTrue16BitInsts, OtherPredicates = [isGFX10Plus] in {
+let True16Predicate = NotHasTrue16BitInsts, SubtargetPredicate = isGFX10Plus in {
def : OpSelBinOpClampPat<uaddsat, V_ADD_NC_U16_e64>;
def : OpSelBinOpClampPat<usubsat, V_SUB_NC_U16_e64>;
// Undo sub x, c -> add x, -c canonicalization since c is more likely
@@ -1112,12 +1112,6 @@ multiclass VOP3_Realtriple_with_name_gfx11_gfx12<bits<10> op, string opName,
VOP3_Realtriple_with_name<GFX11Gen, op, opName, asmName>,
VOP3_Realtriple_with_name<GFX12Gen, op, opName, asmName>;
-multiclass VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<bits<10> op, string opName,
- string asmName> {
- defm opName#"_t16": VOP3_Realtriple_with_name_gfx11_gfx12<op, opName#"_t16", asmName>;
- defm opName#"_fake16": VOP3_Realtriple_with_name_gfx11_gfx12<op, opName#"_fake16", asmName>;
-}
-
multiclass VOP3Dot_Realtriple_gfx11_gfx12<bits<10> op> :
VOP3Dot_Realtriple<GFX11Gen, op>, VOP3Dot_Realtriple<GFX12Gen, op>;
@@ -1126,7 +1120,7 @@ multiclass VOP3_Realtriple_t16_gfx11_gfx12<bits<10> op, string asmName, string o
VOP3_Realtriple_with_name<GFX11Gen, op, opName, asmName, pseudo_mnemonic, isSingle>,
VOP3_Realtriple_with_name<GFX12Gen, op, opName, asmName, pseudo_mnemonic, isSingle>;
-multiclass VOP3_Realtriple_t16_and_f16_gfx11_gfx12<bits<10> op, string asmName, string opName = NAME,
+multiclass VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<bits<10> op, string asmName, string opName = NAME,
string pseudo_mnemonic = "", bit isSingle = 0> {
defm opName#"_t16": VOP3_Realtriple_t16_gfx11_gfx12<op, asmName, opName#"_t16", pseudo_mnemonic, isSingle>;
defm opName#"_fake16": VOP3_Realtriple_t16_gfx11_gfx12<op, asmName, opName#"_fake16", pseudo_mnemonic, isSingle>;
@@ -1226,8 +1220,8 @@ defm V_MAX_U16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x30
defm V_MAX_I16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x30a, "v_max_i16">;
defm V_MIN_U16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x30b, "v_min_u16">;
defm V_MIN_I16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x30c, "v_min_i16">;
-defm V_ADD_NC_I16 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x30d, "V_ADD_I16", "v_add_nc_i16">;
-defm V_SUB_NC_I16 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x30e, "V_SUB_I16", "v_sub_nc_i16">;
+defm V_ADD_NC_I16 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x30d, "v_add_nc_i16", "V_ADD_I16">;
+defm V_SUB_NC_I16 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x30e, "v_sub_nc_i16", "V_SUB_I16">;
defm V_PACK_B32_F16 : VOP3_Realtriple_gfx11_gfx12<0x311>;
defm V_CVT_PK_NORM_I16_F16 : VOP3_Realtriple_with_name_gfx11_gfx12<0x312, "V_CVT_PKNORM_I16_F16" , "v_cvt_pk_norm_i16_f16" >;
defm V_CVT_PK_NORM_U16_F16 : VOP3_Realtriple_with_name_gfx11_gfx12<0x313, "V_CVT_PKNORM_U16_F16" , "v_cvt_pk_norm_u16_f16" >;
diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td
index 0c21ef29cc3c74..b2d4f8c16dee55 100644
--- a/llvm/lib/Target/AMDGPU/VOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td
@@ -1487,7 +1487,7 @@ multiclass VOP3Inst_t16<string OpName, VOPProfile P,
// VOP3 DPP
//===----------------------------------------------------------------------===//
-class Base_VOP3_DPP16<bits<10> op, VOP_DPP_Pseudo ps, string opName = ps.OpName>
+class VOP3_DPP16_Helper<bits<10> op, VOP_DPP_Pseudo ps, string opName = ps.OpName>
: VOP3_DPP<op, opName, ps.Pfl, 1> {
let VOP3_OPSEL = ps.Pfl.HasOpSel;
let IsDOT = ps.IsDOT;
@@ -1500,7 +1500,7 @@ class Base_VOP3_DPP16<bits<10> op, VOP_DPP_Pseudo ps, string opName = ps.OpName>
let OtherPredicates = ps.OtherPredicates;
}
-class Base_VOP3_DPP16_t16<bits<10> op, VOP_DPP_Pseudo ps,
+class VOP3_DPP16_t16_Helper<bits<10> op, VOP_DPP_Pseudo ps,
string opName = ps.OpName>
: VOP3_DPP_t16<op, opName, ps.Pfl, 1> {
let VOP3_OPSEL = ps.Pfl.HasOpSel;
@@ -1516,11 +1516,11 @@ class Base_VOP3_DPP16_t16<bits<10> op, VOP_DPP_Pseudo ps,
class VOP3_DPP16<bits<10> op, VOP_DPP_Pseudo ps, int subtarget,
string opName = ps.OpName>
- : Base_VOP3_DPP16<op, ps, opName>, SIMCInstr<ps.PseudoInstr, subtarget>;
+ : VOP3_DPP16_Helper<op, ps, opName>, SIMCInstr<ps.PseudoInstr, subtarget>;
class VOP3_DPP16_t16<bits<10> op, VOP_DPP_Pseudo ps, int subtarget,
string opName = ps.OpName>
- : Base_VOP3_DPP16_t16<op, ps, opName>, SIMCInstr<ps.PseudoInstr, subtarget>;
+ : VOP3_DPP16_t16_Helper<op, ps, opName>, SIMCInstr<ps.PseudoInstr, subtarget>;
class VOP3_DPP16_Gen<bits<10> op, VOP_DPP_Pseudo ps, GFXGen Gen,
string opName = ps.OpName>
@@ -1568,7 +1568,7 @@ class Base_VOP3_DPP8_t16<bits<10> op, VOP_Pseudo ps, string opName = ps.OpName>
class Base_VOP3b_DPP16<bits<10> op, VOP_DPP_Pseudo ps,
string opName = ps.OpName>
- : Base_VOP3_DPP16<op, ps, opName> {
+ : VOP3_DPP16_Helper<op, ps, opName> {
bits<7> sdst;
let Inst{14 - 8} = sdst;
}
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_opsel.s b/llvm/test/MC/AMDGPU/gfx12_asm_opsel.s
deleted file mode 100644
index 0c270aa5c8b1e2..00000000000000
--- a/llvm/test/MC/AMDGPU/gfx12_asm_opsel.s
+++ /dev/null
@@ -1,48 +0,0 @@
-// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12 %s
-// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12 %s
-
-v_add_nc_i16 v5.h, null, exec_lo op_sel:[1,1,1]
-// GFX12: encoding: [0x05,0x58,0x0d,0xd7,0x7c,0xfc,0x00,0x00]
-
-v_add_nc_i16 v5.l, -1, exec_hi op_sel:[0,0,0]
-// GFX12: encoding: [0x05,0x00,0x0d,0xd7,0xc1,0xfe,0x00,0x00]
-
-v_add_nc_i16 v5.l, 0.5, m0 op_sel:[1,0,0]
-// GFX12: encoding: [0x05,0x08,0x0d,0xd7,0xf0,0xfa,0x00,0x00]
-
-v_add_nc_i16 v5.l, src_scc, vcc_lo op_sel:[0,1,0]
-// GFX12: encoding: [0x05,0x10,0x0d,0xd7,0xfd,0xd4,0x00,0x00]
-
-v_add_nc_i16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp
-// GFX12: encoding: [0xff,0xc0,0x0d,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
-
-v_sub_nc_i16 v5.h, null, exec_lo op_sel:[1,1,1]
-// GFX12: encoding: [0x05,0x58,0x0e,0xd7,0x7c,0xfc,0x00,0x00]
-
-v_sub_nc_i16 v5.l, -1, exec_hi op_sel:[0,0,0]
-// GFX12: encoding: [0x05,0x00,0x0e,0xd7,0xc1,0xfe,0x00,0x00]
-
-v_sub_nc_i16 v5.l, 0.5, m0 op_sel:[1,0,0]
-// GFX12: encoding: [0x05,0x08,0x0e,0xd7,0xf0,0xfa,0x00,0x00]
-
-v_sub_nc_i16 v5.l, src_scc, vcc_lo op_sel:[0,1,0]
-// GFX12: encoding: [0x05,0x10,0x0e,0xd7,0xfd,0xd4,0x00,0x00]
-
-v_sub_nc_i16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp
-// GFX12: encoding: [0xff,0xc0,0x0e,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
-
-v_sub_nc_u16 v5.h, null, exec_lo op_sel:[1,1,1]
-// GFX12: encoding: [0x05,0x58,0x04,0xd7,0x7c,0xfc,0x00,0x00]
-
-v_sub_nc_u16 v5.l, -1, exec_hi op_sel:[0,0,0]
-// GFX12: encoding: [0x05,0x00,0x04,0xd7,0xc1,0xfe,0x00,0x00]
-
-v_sub_nc_u16 v5.l, 0.5, m0 op_sel:[1,0,0]
-// GFX12: encoding: [0x05,0x08,0x04,0xd7,0xf0,0xfa,0x00,0x00]
-
-v_sub_nc_u16 v5.l, src_scc, vcc_lo op_sel:[0,1,0]
-// GFX12: encoding: [0x05,0x10,0x04,0xd7,0xfd,0xd4,0x00,0x00]
-
-v_sub_nc_u16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp
-// GFX12: encoding: [0xff,0xc0,0x04,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
-
>From 685062d2a5f55f457e373c2be86e06b277cfbd11 Mon Sep 17 00:00:00 2001
From: guochen2 <guochen2 at amd.com>
Date: Mon, 7 Oct 2024 18:06:09 -0400
Subject: [PATCH 3/3] remove strayed gfx11 tag
---
llvm/lib/Target/AMDGPU/VOP3Instructions.td | 4 ++--
.../MC/Disassembler/AMDGPU/gfx12_dasm_vop3.txt | 12 ------------
.../AMDGPU/gfx12_dasm_vop3_dpp16.txt | 16 ----------------
.../Disassembler/AMDGPU/gfx12_dasm_vop3_dpp8.txt | 16 ----------------
4 files changed, 2 insertions(+), 46 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index ddf7cf2a228c49..e90ed61b324a81 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -759,7 +759,7 @@ class OpSelBinOpClampPat<SDPatternOperator node,
let SubtargetPredicate = isGFX9Plus, True16Predicate = NotHasTrue16BitInsts in {
def : OpSelBinOpClampPat<saddsat, V_ADD_I16_e64>;
def : OpSelBinOpClampPat<ssubsat, V_SUB_I16_e64>;
-} // End OtherPredicates = [isGFX9Plus], True16Predicate = NotHasTrue16BitInsts
+} // End SubtargetPredicate = isGFX9Plus, True16Predicate = NotHasTrue16BitInsts
let True16Predicate = UseFakeTrue16Insts in {
def : OpSelBinOpClampPat<saddsat, V_ADD_I16_fake16_e64>;
def : OpSelBinOpClampPat<ssubsat, V_SUB_I16_fake16_e64>;
@@ -886,7 +886,7 @@ let True16Predicate = NotHasTrue16BitInsts, SubtargetPredicate = isGFX10Plus in
(add i16:$src0, (i16 NegSubInlineIntConst16:$src1)),
(V_SUB_NC_U16_e64 0, VSrc_b16:$src0, 0, NegSubInlineIntConst16:$src1, 0, 0)
>;
-} // End True16Predicate = NotHasTrue16BitInsts, OtherPredicates = [isGFX10Plus]
+} // End True16Predicate = NotHasTrue16BitInsts, SubtargetPredicate = isGFX10Plus
let True16Predicate = UseFakeTrue16Insts in {
def : OpSelBinOpClampPat<uaddsat, V_ADD_NC_U16_fake16_e64>;
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3.txt
index d9add27704d9d2..c87c8855f5cdc0 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3.txt
@@ -243,21 +243,18 @@
# W64-FAKE16: v_add_nc_i16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0d,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
0xff,0xc0,0x0d,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00
-# GFX11: v_add_nc_i16 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0d,0xd7,0x01,0x05,0x02,0x00]
# W32-REAL16: v_add_nc_i16 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0d,0xd7,0x01,0x05,0x02,0x00]
# W32-FAKE16: v_add_nc_i16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0d,0xd7,0x01,0x05,0x02,0x00]
# W64-REAL16: v_add_nc_i16 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0d,0xd7,0x01,0x05,0x02,0x00]
# W64-FAKE16: v_add_nc_i16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0d,0xd7,0x01,0x05,0x02,0x00]
0x05,0x08,0x0d,0xd7,0x01,0x05,0x02,0x00
-# GFX11: v_add_nc_i16 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xff,0xff,0x03,0x00]
# W32-REAL16: v_add_nc_i16 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xff,0xff,0x03,0x00]
# W32-FAKE16: v_add_nc_i16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xff,0xff,0x03,0x00]
# W64-REAL16: v_add_nc_i16 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xff,0xff,0x03,0x00]
# W64-FAKE16: v_add_nc_i16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xff,0xff,0x03,0x00]
0x05,0x10,0x0d,0xd7,0xff,0xff,0x03,0x00
-# GFX11: v_add_nc_i16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0d,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
# W32-REAL16: v_add_nc_i16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0d,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
# W32-FAKE16: v_add_nc_i16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0d,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
# W64-REAL16: v_add_nc_i16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0d,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
@@ -399,21 +396,18 @@
# W64-FAKE16: v_add_nc_u16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x03,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
0xff,0xc0,0x03,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00
-# GFX11: v_add_nc_u16 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x03,0xd7,0x01,0x05,0x02,0x00]
# W32-REAL16: v_add_nc_u16 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x03,0xd7,0x01,0x05,0x02,0x00]
# W32-FAKE16: v_add_nc_u16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x03,0xd7,0x01,0x05,0x02,0x00]
# W64-REAL16: v_add_nc_u16 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x03,0xd7,0x01,0x05,0x02,0x00]
# W64-FAKE16: v_add_nc_u16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x03,0xd7,0x01,0x05,0x02,0x00]
0x05,0x08,0x03,0xd7,0x01,0x05,0x02,0x00
-# GFX11: v_add_nc_u16 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xff,0xff,0x03,0x00]
# W32-REAL16: v_add_nc_u16 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xff,0xff,0x03,0x00]
# W32-FAKE16: v_add_nc_u16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xff,0xff,0x03,0x00]
# W64-REAL16: v_add_nc_u16 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xff,0xff,0x03,0x00]
# W64-FAKE16: v_add_nc_u16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xff,0xff,0x03,0x00]
0x05,0x10,0x03,0xd7,0xff,0xff,0x03,0x00
-# GFX11: v_add_nc_u16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x03,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
# W32-REAL16: v_add_nc_u16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x03,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
# W32-FAKE16: v_add_nc_u16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x03,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
# W64-REAL16: v_add_nc_u16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x03,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
@@ -6019,21 +6013,18 @@
# W64-FAKE16: v_sub_nc_i16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0e,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
0xff,0xc0,0x0e,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00
-# GFX11: v_sub_nc_i16 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0e,0xd7,0x01,0x05,0x02,0x00]
# W32-REAL16: v_sub_nc_i16 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0e,0xd7,0x01,0x05,0x02,0x00]
# W32-FAKE16: v_sub_nc_i16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0e,0xd7,0x01,0x05,0x02,0x00]
# W64-REAL16: v_sub_nc_i16 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0e,0xd7,0x01,0x05,0x02,0x00]
# W64-FAKE16: v_sub_nc_i16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0e,0xd7,0x01,0x05,0x02,0x00]
0x05,0x08,0x0e,0xd7,0x01,0x05,0x02,0x00
-# GFX11: v_sub_nc_i16 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xff,0xff,0x03,0x00]
# W32-REAL16: v_sub_nc_i16 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xff,0xff,0x03,0x00]
# W32-FAKE16: v_sub_nc_i16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xff,0xff,0x03,0x00]
# W64-REAL16: v_sub_nc_i16 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xff,0xff,0x03,0x00]
# W64-FAKE16: v_sub_nc_i16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xff,0xff,0x03,0x00]
0x05,0x10,0x0e,0xd7,0xff,0xff,0x03,0x00
-# GFX11: v_sub_nc_i16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0e,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
# W32-REAL16: v_sub_nc_i16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0e,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
# W32-FAKE16: v_sub_nc_i16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0e,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
# W64-REAL16: v_sub_nc_i16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0e,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
@@ -6175,21 +6166,18 @@
# W64-FAKE16: v_sub_nc_u16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x04,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
0xff,0xc0,0x04,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00
-# GFX11: v_sub_nc_u16 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x04,0xd7,0x01,0x05,0x02,0x00]
# W32-REAL16: v_sub_nc_u16 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x04,0xd7,0x01,0x05,0x02,0x00]
# W32-FAKE16: v_sub_nc_u16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x04,0xd7,0x01,0x05,0x02,0x00]
# W64-REAL16: v_sub_nc_u16 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x04,0xd7,0x01,0x05,0x02,0x00]
# W64-FAKE16: v_sub_nc_u16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x04,0xd7,0x01,0x05,0x02,0x00]
0x05,0x08,0x04,0xd7,0x01,0x05,0x02,0x00
-# GFX11: v_sub_nc_u16 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xff,0xff,0x03,0x00]
# W32-REAL16: v_sub_nc_u16 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xff,0xff,0x03,0x00]
# W32-FAKE16: v_sub_nc_u16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xff,0xff,0x03,0x00]
# W64-REAL16: v_sub_nc_u16 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xff,0xff,0x03,0x00]
# W64-FAKE16: v_sub_nc_u16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xff,0xff,0x03,0x00]
0x05,0x10,0x04,0xd7,0xff,0xff,0x03,0x00
-# GFX11: v_sub_nc_u16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x04,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
# W32-REAL16: v_sub_nc_u16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x04,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
# W32-FAKE16: v_sub_nc_u16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x04,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
# W64-REAL16: v_sub_nc_u16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x04,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp16.txt
index 5815a6be79954d..5081b9811e43ef 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp16.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp16.txt
@@ -4223,28 +4223,24 @@
# W64-FAKE16: v_add_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30
-# GFX11: v_add_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
# W32-REAL16: v_add_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
# W64-REAL16: v_add_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff
-# GFX11: v_add_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01
-# GFX11: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13
-# GFX11: v_add_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
# W32-REAL16: v_add_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
# W32-FAKE16: v_add_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
# W64-REAL16: v_add_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
@@ -4359,28 +4355,24 @@
# W64-FAKE16: v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30
-# GFX11: v_add_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
# W32-REAL16: v_add_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
# W64-REAL16: v_add_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff
-# GFX11: v_add_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01
-# GFX11: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13
-# GFX11: v_add_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
# W32-REAL16: v_add_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
# W32-FAKE16: v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
# W64-REAL16: v_add_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
@@ -5296,28 +5288,24 @@
# W64-FAKE16: v_sub_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30
-# GFX11: v_sub_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
# W32-REAL16: v_sub_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
# W64-REAL16: v_sub_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff
-# GFX11: v_sub_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01
-# GFX11: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13
-# GFX11: v_sub_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
# W32-REAL16: v_sub_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
# W32-FAKE16: v_sub_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
# W64-REAL16: v_sub_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
@@ -5432,28 +5420,24 @@
# W64-FAKE16: v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30
-# GFX11: v_sub_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
# W32-REAL16: v_sub_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
# W64-REAL16: v_sub_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff
-# GFX11: v_sub_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01
-# GFX11: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13]
0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13
-# GFX11: v_sub_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
# W32-REAL16: v_sub_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
# W32-FAKE16: v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
# W64-REAL16: v_sub_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp8.txt
index 8636891fca9483..77f05027d1cfd2 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp8.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp8.txt
@@ -2447,28 +2447,24 @@
# W64-FAKE16: v_add_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
0xff,0xc0,0x0d,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00
-# GFX11: v_add_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
# W32-REAL16: v_add_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
# W64-REAL16: v_add_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# GFX11: v_add_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
0x05,0x08,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# GFX11: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
0x05,0x10,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# GFX11: v_add_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
# W32-REAL16: v_add_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
# W32-FAKE16: v_add_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
# W64-REAL16: v_add_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
@@ -2529,28 +2525,24 @@
# W64-FAKE16: v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
0xff,0xc0,0x03,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00
-# GFX11: v_add_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
# W32-REAL16: v_add_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
# W64-REAL16: v_add_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
0x05,0x58,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# GFX11: v_add_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
0x05,0x08,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# GFX11: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
0x05,0x10,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# GFX11: v_add_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
# W32-REAL16: v_add_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
# W32-FAKE16: v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
# W64-REAL16: v_add_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
@@ -3301,28 +3293,24 @@
# W64-FAKE16: v_sub_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
0xff,0xc0,0x0e,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00
-# GFX11: v_sub_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
# W32-REAL16: v_sub_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
# W64-REAL16: v_sub_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
0x05,0x58,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# GFX11: v_sub_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
0x05,0x08,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# GFX11: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
0x05,0x10,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# GFX11: v_sub_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
# W32-REAL16: v_sub_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
# W32-FAKE16: v_sub_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
# W64-REAL16: v_sub_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
@@ -3383,28 +3371,24 @@
# W64-FAKE16: v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
0xff,0xc0,0x04,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00
-# GFX11: v_sub_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
# W32-REAL16: v_sub_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
# W64-REAL16: v_sub_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
0x05,0x58,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# GFX11: v_sub_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
0x05,0x08,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# GFX11: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
0x05,0x10,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# GFX11: v_sub_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
# W32-REAL16: v_sub_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
# W32-FAKE16: v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
# W64-REAL16: v_sub_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
More information about the llvm-commits
mailing list