[llvm] 8a0c2e7 - [AMDGPU][True16][MC][CodeGen] true16 for v_cndmask_b16 (#119736)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 16 14:18:32 PST 2025
Author: Brox Chen
Date: 2025-01-16T17:18:28-05:00
New Revision: 8a0c2e75678a4d1d479676217db622d1981c18d3
URL: https://github.com/llvm/llvm-project/commit/8a0c2e75678a4d1d479676217db622d1981c18d3
DIFF: https://github.com/llvm/llvm-project/commit/8a0c2e75678a4d1d479676217db622d1981c18d3.diff
LOG: [AMDGPU][True16][MC][CodeGen] true16 for v_cndmask_b16 (#119736)
Support true16 format for v_cndmask_b16 in MC and CodeGen in true16 and
fake16 flow.
Since we are replacing `v_cndmask_b16` to `v_cndmask_b16_t16/fake16`, we
have to at least update the fake16 codeGen to get codeGen test passing.
For this case, we have to update the true16 and with fake16 together,
otherwise some of the true16 tests will fail
Added:
Modified:
llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
llvm/lib/Target/AMDGPU/SIInstructions.td
llvm/lib/Target/AMDGPU/VOP2Instructions.td
llvm/test/CodeGen/AMDGPU/bf16.ll
llvm/test/CodeGen/AMDGPU/v_cndmask.ll
llvm/test/CodeGen/AMDGPU/valu-mask-write-hazard.mir
llvm/test/MC/AMDGPU/gfx11_asm_vop3.s
llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s
llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s
llvm/test/MC/AMDGPU/gfx12_asm_vop3.s
llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s
llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s
llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3.txt
llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16.txt
llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8.txt
llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3.txt
llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp16.txt
llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp8.txt
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index 5207201e14c091..6baef137df5e16 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -3007,8 +3007,8 @@ bool GCNHazardRecognizer::fixVALUMaskWriteHazard(MachineInstr *MI) {
switch (I.getOpcode()) {
case AMDGPU::V_ADDC_U32_e32:
case AMDGPU::V_ADDC_U32_dpp:
- case AMDGPU::V_CNDMASK_B16_e32:
- case AMDGPU::V_CNDMASK_B16_dpp:
+ case AMDGPU::V_CNDMASK_B16_fake16_e32:
+ case AMDGPU::V_CNDMASK_B16_fake16_dpp:
case AMDGPU::V_CNDMASK_B32_e32:
case AMDGPU::V_CNDMASK_B32_dpp:
case AMDGPU::V_DIV_FMAS_F32_e64:
@@ -3023,8 +3023,8 @@ bool GCNHazardRecognizer::fixVALUMaskWriteHazard(MachineInstr *MI) {
HazardReg == AMDGPU::VCC_HI;
case AMDGPU::V_ADDC_U32_e64:
case AMDGPU::V_ADDC_U32_e64_dpp:
- case AMDGPU::V_CNDMASK_B16_e64:
- case AMDGPU::V_CNDMASK_B16_e64_dpp:
+ case AMDGPU::V_CNDMASK_B16_fake16_e64:
+ case AMDGPU::V_CNDMASK_B16_fake16_e64_dpp:
case AMDGPU::V_CNDMASK_B32_e64:
case AMDGPU::V_CNDMASK_B32_e64_dpp:
case AMDGPU::V_SUBB_U32_e64:
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index cdc1132579d8d8..1abbf4c217a697 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -1245,11 +1245,22 @@ class VOPSelectPat <ValueType vt> : GCNPat <
(vt (select i1:$src0, vt:$src1, vt:$src2)),
(V_CNDMASK_B32_e64 0, VSrc_b32:$src2, 0, VSrc_b32:$src1, SSrc_i1:$src0)
>;
+class VOPSelectPat_t16 <ValueType vt> : GCNPat <
+ (vt (select i1:$src0, vt:$src1, vt:$src2)),
+ (V_CNDMASK_B16_t16_e64 0, VSrcT_b16:$src2, 0, VSrcT_b16:$src1, SSrc_i1:$src0)
+>;
def : VOPSelectModsPat <i32>;
def : VOPSelectModsPat <f32>;
-def : VOPSelectPat <f16>;
-def : VOPSelectPat <i16>;
+foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
+let True16Predicate = p in {
+ def : VOPSelectPat <f16>;
+ def : VOPSelectPat <i16>;
+} // End True16Predicate = p
+let True16Predicate = UseRealTrue16Insts in {
+ def : VOPSelectPat_t16 <f16>;
+ def : VOPSelectPat_t16 <i16>;
+} // End True16Predicate = UseRealTrue16Insts
let AddedComplexity = 1 in {
def : GCNPat <
diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
index 6bbf19179b7f6c..900c91731aa1be 100644
--- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
@@ -714,6 +714,26 @@ class VOP2e_SGPR<list<ValueType> ArgVT> : VOPProfile<ArgVT> {
def VOP2e_I32_I32_I32_I1 : VOP2e_SGPR<[i32, i32, i32, i1]>;
def VOP2e_I16_I16_I16_I1 : VOP2e_SGPR<[i16, i16, i16, i1]>;
// V_CNDMASK_B16 is VOP3 only
+def VOP2e_I16_I16_I16_I1_true16 : VOP2e_SGPR<[i16, i16, i16, i1]> {
+ let IsTrue16 = 1;
+ let IsRealTrue16 = 1;
+ let HasOpSel = 1;
+ let DstRC64 = getVALUDstForVT<DstVT, 1, 1>.ret;
+ let Src0RC64 = getVOP3SrcForVT<Src0VT, 1/*IsTrue16*/>.ret;
+ let Src1RC64 = getVOP3SrcForVT<Src1VT, 1/*IsTrue16*/>.ret;
+ let Src2RC64 = getVOP3SrcForVT<Src2VT, 1/*IsTrue16*/>.ret;
+ let Src0Mod = getSrc0Mod<f16, DstVT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
+ let Src1Mod = getSrcMod<f16, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
+ let HasSrc2Mods = 0;
+ let InsVOP3OpSel = getInsVOP3Base<Src0RC64, Src1RC64,
+ Src2RC64, NumSrcArgs,
+ HasClamp, 1/*HasModifiers*/, 0/*HasSrc2Mods*/, HasOMod,
+ Src0Mod, Src1Mod, Src2Mod, 1/*HasOpSel*/>.ret;
+ let Src0VOP3DPP = VGPRSrc_16;
+ let Src1VOP3DPP = getVOP3DPPSrcForVT<Src1VT, 0/*IsFake16*/>.ret;
+ let Src0ModVOP3DPP = getSrc0ModVOP3DPP<f16, DstVT, 0/*IsFake16*/>.ret;
+ let Src1ModVOP3DPP = getSrcModVOP3DPP<f16, 0/*IsFake16*/>.ret;
+}
def VOP2e_I16_I16_I16_I1_fake16 : VOP2e_SGPR<[i16, i16, i16, i1]> {
let IsTrue16 = 1;
let DstRC64 = getVALUDstForVT<DstVT>.ret;
@@ -765,8 +785,10 @@ def VOP_WRITELANE : VOPProfile<[i32, i32, i32, i32]> {
// VOP2 Instructions
//===----------------------------------------------------------------------===//
-let SubtargetPredicate = isGFX11Plus in
-defm V_CNDMASK_B16 : VOP2eInst <"v_cndmask_b16", VOP2e_I16_I16_I16_I1_fake16>;
+let SubtargetPredicate = isGFX11Plus, True16Predicate = UseRealTrue16Insts in
+defm V_CNDMASK_B16_t16 : VOP2eInst <"v_cndmask_b16_t16", VOP2e_I16_I16_I16_I1_true16>;
+let SubtargetPredicate = isGFX11Plus, True16Predicate = UseFakeTrue16Insts in
+defm V_CNDMASK_B16_fake16 : VOP2eInst <"v_cndmask_b16_fake16", VOP2e_I16_I16_I16_I1_fake16>;
defm V_CNDMASK_B32 : VOP2eInst_VOPD <"v_cndmask_b32", VOP2e_I32_I32_I32_I1, 0x9, "v_cndmask_b32">;
let SubtargetPredicate = HasMadMacF32Insts, isReMaterializable = 1 in
def V_MADMK_F32 : VOP2_Pseudo <"v_madmk_f32", VOP_MADMK_F32, []>;
@@ -1846,7 +1868,7 @@ defm V_FMAMK_F16 : VOP2Only_Real_MADK_t16_and_fake16_gfx11_gfx12<0x037
defm V_FMAAK_F16 : VOP2Only_Real_MADK_t16_and_fake16_gfx11_gfx12<0x038, "v_fmaak_f16">;
// VOP3 only.
-defm V_CNDMASK_B16 : VOP3Only_Realtriple_gfx11_gfx12<0x25d>;
+defm V_CNDMASK_B16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x25d, "v_cndmask_b16">;
defm V_LDEXP_F32 : VOP3Only_Realtriple_gfx11_gfx12<0x31c>;
defm V_BFM_B32 : VOP3Only_Realtriple_gfx11_gfx12<0x31d>;
defm V_BCNT_U32_B32 : VOP3Only_Realtriple_gfx11_gfx12<0x31e>;
diff --git a/llvm/test/CodeGen/AMDGPU/bf16.ll b/llvm/test/CodeGen/AMDGPU/bf16.ll
index bc359d6ff3aaa0..8e3c905b0eae5b 100644
--- a/llvm/test/CodeGen/AMDGPU/bf16.ll
+++ b/llvm/test/CodeGen/AMDGPU/bf16.ll
@@ -34508,14 +34508,25 @@ define bfloat @v_select_bf16(i1 %cond, bfloat %a, bfloat %b) {
; GFX10-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc_lo
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-LABEL: v_select_bf16:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc_lo
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GFX11TRUE16-LABEL: v_select_bf16:
+; GFX11TRUE16: ; %bb.0:
+; GFX11TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11TRUE16-NEXT: v_and_b32_e32 v3, 1, v0
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.l, v2.l
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
+; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v3
+; GFX11TRUE16-NEXT: v_cndmask_b16 v0.l, v0.l, v0.h, vcc_lo
+; GFX11TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11FAKE16-LABEL: v_select_bf16:
+; GFX11FAKE16: ; %bb.0:
+; GFX11FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11FAKE16-NEXT: v_and_b32_e32 v0, 1, v0
+; GFX11FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
+; GFX11FAKE16-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc_lo
+; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
%op = select i1 %cond, bfloat %a, bfloat %b
ret bfloat %op
}
@@ -34573,11 +34584,14 @@ define bfloat @v_select_fneg_lhs_bf16(i1 %cond, bfloat %a, bfloat %b) {
; GFX11TRUE16-LABEL: v_select_fneg_lhs_bf16:
; GFX11TRUE16: ; %bb.0:
; GFX11TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11TRUE16-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX11TRUE16-NEXT: v_xor_b16 v1.l, 0x8000, v1.l
-; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc_lo
+; GFX11TRUE16-NEXT: v_and_b32_e32 v3, 1, v0
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.l, v1.l
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.h, v2.l
+; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v3
+; GFX11TRUE16-NEXT: v_xor_b16 v0.l, 0x8000, v0.l
+; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11TRUE16-NEXT: v_cndmask_b16 v0.l, v0.h, v0.l, vcc_lo
; GFX11TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11FAKE16-LABEL: v_select_fneg_lhs_bf16:
@@ -34647,11 +34661,14 @@ define bfloat @v_select_fneg_rhs_bf16(i1 %cond, bfloat %a, bfloat %b) {
; GFX11TRUE16-LABEL: v_select_fneg_rhs_bf16:
; GFX11TRUE16: ; %bb.0:
; GFX11TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11TRUE16-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX11TRUE16-NEXT: v_xor_b16 v2.l, 0x8000, v2.l
-; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc_lo
+; GFX11TRUE16-NEXT: v_and_b32_e32 v3, 1, v0
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.l, v2.l
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
+; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v3
+; GFX11TRUE16-NEXT: v_xor_b16 v0.l, 0x8000, v0.l
+; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11TRUE16-NEXT: v_cndmask_b16 v0.l, v0.l, v0.h, vcc_lo
; GFX11TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11FAKE16-LABEL: v_select_fneg_rhs_bf16:
@@ -34749,11 +34766,15 @@ define <2 x bfloat> @v_select_v2bf16(i1 %cond, <2 x bfloat> %a, <2 x bfloat> %b)
; GFX11TRUE16: ; %bb.0:
; GFX11TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11TRUE16-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2
-; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v4, 16, v1
+; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v3, 16, v1
+; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX11TRUE16-NEXT: v_dual_cndmask_b32 v0, v2, v1 :: v_dual_cndmask_b32 v1, v3, v4
+; GFX11TRUE16-NEXT: v_cndmask_b16 v0.l, v4.l, v3.l, vcc_lo
+; GFX11TRUE16-NEXT: v_cndmask_b16 v0.h, v2.l, v1.l, vcc_lo
+; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.l
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.l, v0.h
; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11TRUE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
; GFX11TRUE16-NEXT: s_setpc_b64 s[30:31]
@@ -34856,14 +34877,19 @@ define <2 x bfloat> @v_vselect_v2bf16(<2 x i1> %cond, <2 x bfloat> %a, <2 x bflo
; GFX11TRUE16-LABEL: v_vselect_v2bf16:
; GFX11TRUE16: ; %bb.0:
; GFX11TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v4, 16, v3
-; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2
+; GFX11TRUE16-NEXT: v_and_b32_e32 v1, 1, v1
; GFX11TRUE16-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX11TRUE16-NEXT: v_dual_cndmask_b32 v0, v3, v2 :: v_dual_and_b32 v1, 1, v1
+; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
+; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v5, 16, v3
+; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1
-; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v1, v4, v5, vcc_lo
+; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 1, v0
+; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11TRUE16-NEXT: v_cndmask_b16 v0.l, v5.l, v4.l, vcc_lo
+; GFX11TRUE16-NEXT: v_cndmask_b16 v0.h, v3.l, v2.l, s0
+; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.l
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.l, v0.h
; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11TRUE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
; GFX11TRUE16-NEXT: s_setpc_b64 s[30:31]
@@ -34936,16 +34962,27 @@ define amdgpu_ps i32 @s_select_bf16(bfloat inreg %a, bfloat inreg %b, i32 %c) {
; GFX10-NEXT: v_readfirstlane_b32 s0, v0
; GFX10-NEXT: ; return to shader part epilog
;
-; GFX11-LABEL: s_select_bf16:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: v_mov_b32_e32 v1, s0
-; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT: v_cndmask_b32_e32 v0, s1, v1, vcc_lo
-; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: v_readfirstlane_b32 s0, v0
-; GFX11-NEXT: ; return to shader part epilog
+; GFX11TRUE16-LABEL: s_select_bf16:
+; GFX11TRUE16: ; %bb.0:
+; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.l, s0
+; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11TRUE16-NEXT: v_cndmask_b16 v0.l, s1, v0.l, vcc_lo
+; GFX11TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11TRUE16-NEXT: v_readfirstlane_b32 s0, v0
+; GFX11TRUE16-NEXT: ; return to shader part epilog
+;
+; GFX11FAKE16-LABEL: s_select_bf16:
+; GFX11FAKE16: ; %bb.0:
+; GFX11FAKE16-NEXT: v_mov_b32_e32 v1, s0
+; GFX11FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11FAKE16-NEXT: v_cndmask_b32_e32 v0, s1, v1, vcc_lo
+; GFX11FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX11FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11FAKE16-NEXT: v_readfirstlane_b32 s0, v0
+; GFX11FAKE16-NEXT: ; return to shader part epilog
%cond = icmp eq i32 %c, 0
%op = select i1 %cond, bfloat %a, bfloat %b
%cast = bitcast bfloat %op to i16
@@ -35038,17 +35075,21 @@ define amdgpu_ps i32 @s_select_v2bf16(<2 x bfloat> inreg %a, <2 x bfloat> inreg
;
; GFX11TRUE16-LABEL: s_select_v2bf16:
; GFX11TRUE16: ; %bb.0:
-; GFX11TRUE16-NEXT: s_lshr_b32 s2, s1, 16
-; GFX11TRUE16-NEXT: s_lshr_b32 s3, s0, 16
-; GFX11TRUE16-NEXT: v_mov_b16_e32 v1.l, s2
-; GFX11TRUE16-NEXT: v_mov_b16_e32 v2.l, s3
-; GFX11TRUE16-NEXT: v_mov_b16_e32 v3.l, s1
-; GFX11TRUE16-NEXT: v_mov_b16_e32 v4.l, s0
+; GFX11TRUE16-NEXT: s_lshr_b32 s2, s0, 16
+; GFX11TRUE16-NEXT: s_lshr_b32 s3, s1, 16
; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11TRUE16-NEXT: v_dual_cndmask_b32 v0, v1, v2 :: v_dual_cndmask_b32 v1, v3, v4
-; GFX11TRUE16-NEXT: v_perm_b32 v0, v0, v1, 0x5040100
-; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.l, s3
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.h, s2
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v1.l, s1
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v1.h, s0
+; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11TRUE16-NEXT: v_cndmask_b16 v0.l, v0.l, v0.h, vcc_lo
+; GFX11TRUE16-NEXT: v_cndmask_b16 v0.h, v1.l, v1.h, vcc_lo
+; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.l
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.l, v0.h
+; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11TRUE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
; GFX11TRUE16-NEXT: v_readfirstlane_b32 s0, v0
; GFX11TRUE16-NEXT: ; return to shader part epilog
;
@@ -35156,17 +35197,20 @@ define amdgpu_ps i32 @s_vselect_v2bf16(<2 x bfloat> inreg %a, <2 x bfloat> inreg
;
; GFX11TRUE16-LABEL: s_vselect_v2bf16:
; GFX11TRUE16: ; %bb.0:
-; GFX11TRUE16-NEXT: s_lshr_b32 s2, s1, 16
-; GFX11TRUE16-NEXT: s_lshr_b32 s3, s0, 16
-; GFX11TRUE16-NEXT: v_mov_b16_e32 v2.l, s2
-; GFX11TRUE16-NEXT: v_mov_b16_e32 v3.l, s3
-; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11TRUE16-NEXT: v_mov_b16_e32 v4.l, s1
-; GFX11TRUE16-NEXT: v_mov_b16_e32 v5.l, s0
-; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_3)
-; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo
+; GFX11TRUE16-NEXT: s_lshr_b32 s3, s1, 16
+; GFX11TRUE16-NEXT: s_lshr_b32 s4, s0, 16
; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc_lo
+; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s2, 0, v1
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.l, s3
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.h, s4
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v1.l, s1
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v1.h, s0
+; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11TRUE16-NEXT: v_cndmask_b16 v0.l, v0.l, v0.h, s2
+; GFX11TRUE16-NEXT: v_cndmask_b16 v0.h, v1.l, v1.h, vcc_lo
+; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.l
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.l, v0.h
; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11TRUE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
; GFX11TRUE16-NEXT: v_readfirstlane_b32 s0, v0
@@ -36876,33 +36920,38 @@ define amdgpu_ps <2 x i32> @s_vselect_v4bf16(<4 x bfloat> inreg %a, <4 x bfloat>
;
; GFX11TRUE16-LABEL: s_vselect_v4bf16:
; GFX11TRUE16: ; %bb.0:
-; GFX11TRUE16-NEXT: s_lshr_b32 s4, s3, 16
-; GFX11TRUE16-NEXT: s_lshr_b32 s5, s1, 16
-; GFX11TRUE16-NEXT: v_mov_b16_e32 v4.l, s4
-; GFX11TRUE16-NEXT: v_mov_b16_e32 v5.l, s5
-; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3
-; GFX11TRUE16-NEXT: s_lshr_b32 s4, s0, 16
-; GFX11TRUE16-NEXT: v_mov_b16_e32 v7.l, s2
-; GFX11TRUE16-NEXT: v_mov_b16_e32 v8.l, s0
-; GFX11TRUE16-NEXT: v_mov_b16_e32 v6.l, s4
-; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v3, v4, v5, vcc_lo
-; GFX11TRUE16-NEXT: v_mov_b16_e32 v4.l, s3
-; GFX11TRUE16-NEXT: s_lshr_b32 s3, s2, 16
-; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11TRUE16-NEXT: v_mov_b16_e32 v5.l, s3
-; GFX11TRUE16-NEXT: v_mov_b16_e32 v9.l, s1
-; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_4) | instid1(VALU_DEP_3)
-; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc_lo
+; GFX11TRUE16-NEXT: s_lshr_b32 s7, s3, 16
; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v0, v7, v8, vcc_lo
-; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v2
-; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v2, v4, v9, vcc_lo
-; GFX11TRUE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s4, 0, v1
+; GFX11TRUE16-NEXT: s_lshr_b32 s8, s1, 16
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.l, s7
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v1.l, s3
+; GFX11TRUE16-NEXT: s_lshr_b32 s3, s2, 16
+; GFX11TRUE16-NEXT: s_lshr_b32 s7, s0, 16
+; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s5, 0, v2
+; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s6, 0, v3
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.h, s8
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v1.h, s3
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v2.l, s7
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v2.h, s2
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v3.l, s0
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v3.h, s1
+; GFX11TRUE16-NEXT: v_cndmask_b16 v0.l, v0.l, v0.h, s6
+; GFX11TRUE16-NEXT: v_cndmask_b16 v0.h, v1.h, v2.l, s4
+; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11TRUE16-NEXT: v_cndmask_b16 v1.h, v2.h, v3.l, vcc_lo
+; GFX11TRUE16-NEXT: v_cndmask_b16 v1.l, v1.l, v3.h, s5
+; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.l
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.l, v0.h
+; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v3.l, v1.h
+; GFX11TRUE16-NEXT: v_perm_b32 v1, v2, v1, 0x5040100
; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11TRUE16-NEXT: v_perm_b32 v1, v3, v2, 0x5040100
-; GFX11TRUE16-NEXT: v_readfirstlane_b32 s0, v0
-; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX11TRUE16-NEXT: v_perm_b32 v0, v0, v3, 0x5040100
; GFX11TRUE16-NEXT: v_readfirstlane_b32 s1, v1
+; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX11TRUE16-NEXT: v_readfirstlane_b32 s0, v0
; GFX11TRUE16-NEXT: ; return to shader part epilog
;
; GFX11FAKE16-LABEL: s_vselect_v4bf16:
@@ -37078,29 +37127,33 @@ define <4 x bfloat> @v_vselect_v4bf16(<4 x i1> %cond, <4 x bfloat> %a, <4 x bflo
; GFX11TRUE16-LABEL: v_vselect_v4bf16:
; GFX11TRUE16: ; %bb.0:
; GFX11TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11TRUE16-NEXT: v_and_b32_e32 v2, 1, v2
-; GFX11TRUE16-NEXT: v_mov_b16_e32 v8.l, v7.l
-; GFX11TRUE16-NEXT: v_mov_b16_e32 v9.l, v5.l
-; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v7, 16, v7
-; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v5, 16, v5
; GFX11TRUE16-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v2
-; GFX11TRUE16-NEXT: v_and_b32_e32 v3, 1, v3
-; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v2, v8, v9, vcc_lo
-; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_4) | instid1(VALU_DEP_4)
-; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
; GFX11TRUE16-NEXT: v_and_b32_e32 v1, 1, v1
-; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v8, 16, v6
-; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v9, 16, v4
-; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v0, v6, v4, vcc_lo
+; GFX11TRUE16-NEXT: v_and_b32_e32 v3, 1, v3
+; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v8, 16, v7
+; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4)
+; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 1, v0
+; GFX11TRUE16-NEXT: v_and_b32_e32 v0, 1, v2
; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1
-; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v1, v8, v9, vcc_lo
-; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v3
-; GFX11TRUE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v3, v7, v5, vcc_lo
-; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11TRUE16-NEXT: v_perm_b32 v1, v3, v2, 0x5040100
+; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s1, 1, v3
+; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v1, 16, v4
+; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v2, 16, v6
+; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v3, 16, v5
+; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s2, 1, v0
+; GFX11TRUE16-NEXT: v_cndmask_b16 v0.h, v6.l, v4.l, s0
+; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v1.l, vcc_lo
+; GFX11TRUE16-NEXT: v_cndmask_b16 v1.l, v8.l, v3.l, s1
+; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11TRUE16-NEXT: v_cndmask_b16 v1.h, v7.l, v5.l, s2
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.l
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.l, v0.h
+; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v3.l, v1.l
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v1.l, v1.h
+; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11TRUE16-NEXT: v_perm_b32 v0, v2, v0, 0x5040100
+; GFX11TRUE16-NEXT: v_perm_b32 v1, v3, v1, 0x5040100
; GFX11TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11FAKE16-LABEL: v_vselect_v4bf16:
@@ -37368,51 +37421,51 @@ define <8 x bfloat> @v_vselect_v8bf16(<8 x i1> %cond, <8 x bfloat> %a, <8 x bflo
; GFX11TRUE16-LABEL: v_vselect_v8bf16:
; GFX11TRUE16: ; %bb.0:
; GFX11TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11TRUE16-NEXT: v_mov_b16_e32 v16.l, v15.l
-; GFX11TRUE16-NEXT: v_mov_b16_e32 v17.l, v11.l
-; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v15, 16, v15
-; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v11, 16, v11
-; GFX11TRUE16-NEXT: v_and_b32_e32 v6, 1, v6
-; GFX11TRUE16-NEXT: v_and_b32_e32 v4, 1, v4
-; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_4)
-; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v6
-; GFX11TRUE16-NEXT: v_and_b32_e32 v3, 1, v3
-; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v6, v16, v17, vcc_lo
-; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v4
; GFX11TRUE16-NEXT: v_and_b32_e32 v1, 1, v1
-; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v16, 16, v14
-; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v17, 16, v10
-; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v4, v14, v10, vcc_lo
-; GFX11TRUE16-NEXT: v_mov_b16_e32 v10.l, v13.l
-; GFX11TRUE16-NEXT: v_mov_b16_e32 v14.l, v9.l
-; GFX11TRUE16-NEXT: v_and_b32_e32 v5, 1, v5
-; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v13, 16, v13
-; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v9, 16, v9
-; GFX11TRUE16-NEXT: v_and_b32_e32 v2, 1, v2
-; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_4)
-; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v5
; GFX11TRUE16-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v5, v16, v17, vcc_lo
-; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v2
-; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v2, v10, v14, vcc_lo
-; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4)
-; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX11TRUE16-NEXT: v_and_b32_e32 v7, 1, v7
-; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v10, 16, v12
-; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v14, 16, v8
-; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v0, v12, v8, vcc_lo
+; GFX11TRUE16-NEXT: v_and_b32_e32 v3, 1, v3
+; GFX11TRUE16-NEXT: v_and_b32_e32 v2, 1, v2
+; GFX11TRUE16-NEXT: v_and_b32_e32 v4, 1, v4
; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1
-; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v1, v10, v14, vcc_lo
-; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v3
-; GFX11TRUE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v3, v13, v9, vcc_lo
-; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v7
-; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_2)
-; GFX11TRUE16-NEXT: v_perm_b32 v1, v3, v2, 0x5040100
-; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v7, v15, v11, vcc_lo
-; GFX11TRUE16-NEXT: v_perm_b32 v2, v5, v4, 0x5040100
-; GFX11TRUE16-NEXT: v_perm_b32 v3, v7, v6, 0x5040100
+; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 1, v0
+; GFX11TRUE16-NEXT: v_and_b32_e32 v0, 1, v7
+; GFX11TRUE16-NEXT: v_and_b32_e32 v1, 1, v6
+; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s1, 1, v3
+; GFX11TRUE16-NEXT: v_and_b32_e32 v3, 1, v5
+; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v5, 16, v15
+; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s2, 1, v0
+; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s3, 1, v1
+; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v1, 16, v11
+; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s4, 1, v4
+; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s5, 1, v2
+; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s6, 1, v3
+; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v2, 16, v8
+; GFX11TRUE16-NEXT: v_cndmask_b16 v0.h, v5.l, v1.l, s2
+; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v3, 16, v12
+; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v4, 16, v9
+; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v5, 16, v13
+; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v6, 16, v10
+; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v7, 16, v14
+; GFX11TRUE16-NEXT: v_cndmask_b16 v0.l, v15.l, v11.l, s3
+; GFX11TRUE16-NEXT: v_cndmask_b16 v1.l, v14.l, v10.l, s4
+; GFX11TRUE16-NEXT: v_cndmask_b16 v1.h, v3.l, v2.l, vcc_lo
+; GFX11TRUE16-NEXT: v_cndmask_b16 v2.l, v12.l, v8.l, s0
+; GFX11TRUE16-NEXT: v_cndmask_b16 v2.h, v5.l, v4.l, s1
+; GFX11TRUE16-NEXT: v_cndmask_b16 v3.l, v13.l, v9.l, s5
+; GFX11TRUE16-NEXT: v_cndmask_b16 v3.h, v7.l, v6.l, s6
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v4.l, v1.h
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v5.l, v2.l
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v2.l, v2.h
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v6.l, v3.l
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v3.l, v3.h
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v7.l, v1.l
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v8.l, v0.h
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v9.l, v0.l
+; GFX11TRUE16-NEXT: v_perm_b32 v0, v4, v5, 0x5040100
+; GFX11TRUE16-NEXT: v_perm_b32 v1, v2, v6, 0x5040100
+; GFX11TRUE16-NEXT: v_perm_b32 v2, v3, v7, 0x5040100
+; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4)
+; GFX11TRUE16-NEXT: v_perm_b32 v3, v8, v9, 0x5040100
; GFX11TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11FAKE16-LABEL: v_vselect_v8bf16:
@@ -38024,101 +38077,96 @@ define <16 x bfloat> @v_vselect_v16bf16(<16 x i1> %cond, <16 x bfloat> %a, <16 x
; GFX11TRUE16: ; %bb.0:
; GFX11TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11TRUE16-NEXT: scratch_load_b32 v31, off, s32
-; GFX11TRUE16-NEXT: v_mov_b16_e32 v49.l, v26.l
-; GFX11TRUE16-NEXT: v_mov_b16_e32 v50.l, v18.l
-; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v18, 16, v18
-; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v26, 16, v26
-; GFX11TRUE16-NEXT: v_and_b32_e32 v12, 1, v12
-; GFX11TRUE16-NEXT: v_mov_b16_e32 v33.l, v30.l
-; GFX11TRUE16-NEXT: v_mov_b16_e32 v34.l, v22.l
-; GFX11TRUE16-NEXT: v_mov_b16_e32 v53.l, v24.l
-; GFX11TRUE16-NEXT: v_mov_b16_e32 v54.l, v16.l
-; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v16, 16, v16
-; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v24, 16, v24
-; GFX11TRUE16-NEXT: v_and_b32_e32 v10, 1, v10
-; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v12
-; GFX11TRUE16-NEXT: v_and_b32_e32 v1, 1, v1
-; GFX11TRUE16-NEXT: v_mov_b16_e32 v35.l, v29.l
-; GFX11TRUE16-NEXT: v_mov_b16_e32 v36.l, v21.l
-; GFX11TRUE16-NEXT: v_mov_b16_e32 v51.l, v25.l
-; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v12, v33, v34, vcc_lo
-; GFX11TRUE16-NEXT: v_mov_b16_e32 v52.l, v17.l
-; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v17, 16, v17
-; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v25, 16, v25
+; GFX11TRUE16-NEXT: v_and_b32_e32 v9, 1, v9
; GFX11TRUE16-NEXT: v_and_b32_e32 v8, 1, v8
-; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v10
-; GFX11TRUE16-NEXT: v_and_b32_e32 v3, 1, v3
-; GFX11TRUE16-NEXT: v_mov_b16_e32 v37.l, v28.l
-; GFX11TRUE16-NEXT: v_mov_b16_e32 v38.l, v20.l
-; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v20, 16, v20
-; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v10, v35, v36, vcc_lo
-; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v28, 16, v28
+; GFX11TRUE16-NEXT: v_and_b32_e32 v1, 1, v1
+; GFX11TRUE16-NEXT: v_and_b32_e32 v0, 1, v0
+; GFX11TRUE16-NEXT: v_and_b32_e32 v2, 1, v2
+; GFX11TRUE16-NEXT: v_and_b32_e32 v7, 1, v7
; GFX11TRUE16-NEXT: v_and_b32_e32 v6, 1, v6
-; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v8
+; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v37, 16, v20
+; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v38, 16, v28
+; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s7, 1, v9
+; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s8, 1, v8
+; GFX11TRUE16-NEXT: v_and_b32_e32 v3, 1, v3
; GFX11TRUE16-NEXT: v_and_b32_e32 v5, 1, v5
-; GFX11TRUE16-NEXT: v_mov_b16_e32 v39.l, v27.l
-; GFX11TRUE16-NEXT: v_mov_b16_e32 v48.l, v19.l
-; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v19, 16, v19
-; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v8, v37, v38, vcc_lo
-; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v27, 16, v27
; GFX11TRUE16-NEXT: v_and_b32_e32 v4, 1, v4
-; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v6
-; GFX11TRUE16-NEXT: v_and_b32_e32 v7, 1, v7
-; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v21, 16, v21
-; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v29, 16, v29
-; GFX11TRUE16-NEXT: v_and_b32_e32 v2, 1, v2
-; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v6, v39, v48, vcc_lo
-; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v4
-; GFX11TRUE16-NEXT: v_and_b32_e32 v9, 1, v9
-; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v22, 16, v22
-; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v30, 16, v30
-; GFX11TRUE16-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v4, v49, v50, vcc_lo
-; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v2
; GFX11TRUE16-NEXT: v_and_b32_e32 v11, 1, v11
-; GFX11TRUE16-NEXT: v_mov_b16_e32 v32.l, v23.l
-; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v23, 16, v23
-; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v2, v51, v52, vcc_lo
-; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
+; GFX11TRUE16-NEXT: v_and_b32_e32 v10, 1, v10
; GFX11TRUE16-NEXT: v_and_b32_e32 v13, 1, v13
-; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v0, v53, v54, vcc_lo
-; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v13
-; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v13, v30, v22, vcc_lo
-; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v11
-; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v11, v29, v21, vcc_lo
-; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v9
-; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v9, v28, v20, vcc_lo
-; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v7
-; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v7, v27, v19, vcc_lo
+; GFX11TRUE16-NEXT: v_and_b32_e32 v12, 1, v12
+; GFX11TRUE16-NEXT: v_and_b32_e32 v15, 1, v15
+; GFX11TRUE16-NEXT: v_and_b32_e32 v14, 1, v14
+; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v39, 16, v19
+; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v48, 16, v27
+; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v53, 16, v16
+; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v54, 16, v24
; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1
-; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v1, v24, v16, vcc_lo
-; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v3
-; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_2)
-; GFX11TRUE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v3, v25, v17, vcc_lo
-; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v5
-; GFX11TRUE16-NEXT: v_perm_b32 v1, v3, v2, 0x5040100
-; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v5, v26, v18, vcc_lo
-; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11TRUE16-NEXT: v_perm_b32 v2, v5, v4, 0x5040100
-; GFX11TRUE16-NEXT: v_perm_b32 v4, v9, v8, 0x5040100
-; GFX11TRUE16-NEXT: v_perm_b32 v5, v11, v10, 0x5040100
+; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 1, v0
+; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s2, 1, v2
+; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s5, 1, v7
+; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s6, 1, v6
+; GFX11TRUE16-NEXT: v_cndmask_b16 v2.l, v28.l, v20.l, s8
+; GFX11TRUE16-NEXT: v_cndmask_b16 v2.h, v38.l, v37.l, s7
+; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v32, 16, v23
+; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v33, 16, v22
+; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v34, 16, v30
+; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v35, 16, v21
+; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v36, 16, v29
+; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v49, 16, v18
+; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v50, 16, v26
+; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v51, 16, v17
+; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v52, 16, v25
+; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s1, 1, v3
+; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s3, 1, v5
+; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s4, 1, v4
+; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s9, 1, v11
+; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s10, 1, v12
+; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s11, 1, v13
+; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s12, 1, v10
+; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s13, 1, v15
+; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s14, 1, v14
+; GFX11TRUE16-NEXT: v_cndmask_b16 v3.l, v27.l, v19.l, s6
+; GFX11TRUE16-NEXT: v_cndmask_b16 v3.h, v48.l, v39.l, s5
+; GFX11TRUE16-NEXT: v_cndmask_b16 v4.h, v54.l, v53.l, vcc_lo
+; GFX11TRUE16-NEXT: v_cndmask_b16 v5.l, v24.l, v16.l, s0
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v12.l, v2.h
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v13.l, v2.l
+; GFX11TRUE16-NEXT: v_cndmask_b16 v0.l, v30.l, v22.l, s10
+; GFX11TRUE16-NEXT: v_cndmask_b16 v0.h, v34.l, v33.l, s11
+; GFX11TRUE16-NEXT: v_cndmask_b16 v1.l, v29.l, v21.l, s12
+; GFX11TRUE16-NEXT: v_cndmask_b16 v1.h, v36.l, v35.l, s9
+; GFX11TRUE16-NEXT: v_cndmask_b16 v5.h, v52.l, v51.l, s1
+; GFX11TRUE16-NEXT: v_cndmask_b16 v6.l, v25.l, v17.l, s2
+; GFX11TRUE16-NEXT: v_cndmask_b16 v6.h, v50.l, v49.l, s3
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v7.l, v4.h
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v8.l, v5.l
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v10.l, v3.h
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v11.l, v3.l
+; GFX11TRUE16-NEXT: v_cndmask_b16 v4.l, v26.l, v18.l, s4
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v5.l, v5.h
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v9.l, v6.l
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v6.l, v6.h
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v14.l, v1.h
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v15.l, v1.l
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v16.l, v0.h
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v17.l, v0.l
+; GFX11TRUE16-NEXT: v_perm_b32 v0, v7, v8, 0x5040100
+; GFX11TRUE16-NEXT: v_perm_b32 v1, v5, v9, 0x5040100
+; GFX11TRUE16-NEXT: v_perm_b32 v5, v14, v15, 0x5040100
; GFX11TRUE16-NEXT: s_waitcnt vmcnt(0)
-; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v16, 16, v31
-; GFX11TRUE16-NEXT: v_mov_b16_e32 v17.l, v31.l
-; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX11TRUE16-NEXT: v_mov_b16_e32 v3.l, v16.l
-; GFX11TRUE16-NEXT: v_and_b32_e32 v14, 1, v14
-; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v14
-; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11TRUE16-NEXT: v_dual_cndmask_b32 v14, v17, v32 :: v_dual_and_b32 v15, 1, v15
-; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v15
-; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v15, v3, v23, vcc_lo
-; GFX11TRUE16-NEXT: v_perm_b32 v3, v7, v6, 0x5040100
-; GFX11TRUE16-NEXT: v_perm_b32 v6, v13, v12, 0x5040100
-; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3)
-; GFX11TRUE16-NEXT: v_perm_b32 v7, v15, v14, 0x5040100
+; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v2, 16, v31
+; GFX11TRUE16-NEXT: v_cndmask_b16 v3.l, v31.l, v23.l, s14
+; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11TRUE16-NEXT: v_cndmask_b16 v3.h, v2.l, v32.l, s13
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v7.l, v3.l
+; GFX11TRUE16-NEXT: v_perm_b32 v2, v6, v4, 0x5040100
+; GFX11TRUE16-NEXT: v_perm_b32 v4, v12, v13, 0x5040100
+; GFX11TRUE16-NEXT: v_perm_b32 v6, v16, v17, 0x5040100
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v8.l, v3.h
+; GFX11TRUE16-NEXT: v_perm_b32 v3, v10, v11, 0x5040100
+; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX11TRUE16-NEXT: v_perm_b32 v7, v8, v7, 0x5040100
; GFX11TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11FAKE16-LABEL: v_vselect_v16bf16:
@@ -39660,217 +39708,197 @@ define <32 x bfloat> @v_vselect_v32bf16(<32 x i1> %cond, <32 x bfloat> %a, <32 x
; GFX11TRUE16-NEXT: scratch_load_b32 v85, off, s32 offset:8
; GFX11TRUE16-NEXT: scratch_load_b32 v86, off, s32 offset:68
; GFX11TRUE16-NEXT: scratch_load_b32 v87, off, s32 offset:4
+; GFX11TRUE16-NEXT: v_and_b32_e32 v0, 1, v0
+; GFX11TRUE16-NEXT: v_and_b32_e32 v8, 1, v8
+; GFX11TRUE16-NEXT: v_and_b32_e32 v22, 1, v22
+; GFX11TRUE16-NEXT: v_and_b32_e32 v24, 1, v24
+; GFX11TRUE16-NEXT: v_and_b32_e32 v26, 1, v26
+; GFX11TRUE16-NEXT: v_and_b32_e32 v28, 1, v28
+; GFX11TRUE16-NEXT: v_and_b32_e32 v30, 1, v30
+; GFX11TRUE16-NEXT: v_and_b32_e32 v1, 1, v1
+; GFX11TRUE16-NEXT: v_and_b32_e32 v3, 1, v3
+; GFX11TRUE16-NEXT: v_and_b32_e32 v2, 1, v2
+; GFX11TRUE16-NEXT: v_and_b32_e32 v5, 1, v5
+; GFX11TRUE16-NEXT: v_and_b32_e32 v4, 1, v4
+; GFX11TRUE16-NEXT: v_and_b32_e32 v7, 1, v7
+; GFX11TRUE16-NEXT: v_and_b32_e32 v9, 1, v9
+; GFX11TRUE16-NEXT: v_and_b32_e32 v11, 1, v11
+; GFX11TRUE16-NEXT: v_and_b32_e32 v10, 1, v10
+; GFX11TRUE16-NEXT: v_and_b32_e32 v13, 1, v13
+; GFX11TRUE16-NEXT: v_and_b32_e32 v12, 1, v12
+; GFX11TRUE16-NEXT: v_and_b32_e32 v15, 1, v15
+; GFX11TRUE16-NEXT: v_and_b32_e32 v14, 1, v14
+; GFX11TRUE16-NEXT: v_and_b32_e32 v17, 1, v17
+; GFX11TRUE16-NEXT: v_and_b32_e32 v16, 1, v16
+; GFX11TRUE16-NEXT: v_and_b32_e32 v19, 1, v19
+; GFX11TRUE16-NEXT: v_and_b32_e32 v18, 1, v18
+; GFX11TRUE16-NEXT: v_and_b32_e32 v21, 1, v21
+; GFX11TRUE16-NEXT: v_and_b32_e32 v20, 1, v20
+; GFX11TRUE16-NEXT: v_and_b32_e32 v23, 1, v23
+; GFX11TRUE16-NEXT: v_and_b32_e32 v25, 1, v25
+; GFX11TRUE16-NEXT: v_and_b32_e32 v27, 1, v27
+; GFX11TRUE16-NEXT: v_and_b32_e32 v29, 1, v29
+; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 1, v0
+; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s8, 1, v8
+; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s22, 1, v22
+; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s24, 1, v24
+; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s26, 1, v30
+; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s27, 1, v26
+; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s29, 1, v28
+; GFX11TRUE16-NEXT: v_and_b32_e32 v6, 1, v6
+; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1
+; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s1, 1, v3
+; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s2, 1, v2
+; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s3, 1, v5
+; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s4, 1, v4
+; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s5, 1, v7
+; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s7, 1, v9
+; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s9, 1, v11
+; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s10, 1, v10
+; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s11, 1, v13
+; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s12, 1, v12
+; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s13, 1, v15
+; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s14, 1, v14
+; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s15, 1, v17
+; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s16, 1, v16
+; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s17, 1, v19
+; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s18, 1, v18
+; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s19, 1, v21
+; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s20, 1, v20
+; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s21, 1, v23
+; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s23, 1, v25
+; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s25, 1, v27
+; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s28, 1, v29
+; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s6, 1, v6
+; GFX11TRUE16-NEXT: s_waitcnt vmcnt(32)
+; GFX11TRUE16-NEXT: v_and_b32_e32 v8, 1, v31
; GFX11TRUE16-NEXT: s_waitcnt vmcnt(31)
-; GFX11TRUE16-NEXT: v_mov_b16_e32 v96.l, v32.l
+; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v17, 16, v32
; GFX11TRUE16-NEXT: s_waitcnt vmcnt(30)
-; GFX11TRUE16-NEXT: v_mov_b16_e32 v97.l, v33.l
-; GFX11TRUE16-NEXT: s_waitcnt vmcnt(29)
-; GFX11TRUE16-NEXT: v_mov_b16_e32 v98.l, v34.l
+; GFX11TRUE16-NEXT: v_cndmask_b16 v0.l, v32.l, v33.l, s26
+; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v16, 16, v33
; GFX11TRUE16-NEXT: s_waitcnt vmcnt(28)
-; GFX11TRUE16-NEXT: v_mov_b16_e32 v99.l, v35.l
-; GFX11TRUE16-NEXT: s_waitcnt vmcnt(27)
-; GFX11TRUE16-NEXT: v_mov_b16_e32 v100.l, v36.l
+; GFX11TRUE16-NEXT: v_cndmask_b16 v0.h, v34.l, v35.l, s29
+; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v9, 16, v35
+; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v10, 16, v34
; GFX11TRUE16-NEXT: s_waitcnt vmcnt(26)
-; GFX11TRUE16-NEXT: v_mov_b16_e32 v101.l, v37.l
-; GFX11TRUE16-NEXT: s_waitcnt vmcnt(25)
-; GFX11TRUE16-NEXT: v_mov_b16_e32 v102.l, v38.l
+; GFX11TRUE16-NEXT: v_cndmask_b16 v1.l, v36.l, v37.l, s27
+; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v11, 16, v37
+; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v12, 16, v36
; GFX11TRUE16-NEXT: s_waitcnt vmcnt(24)
-; GFX11TRUE16-NEXT: v_mov_b16_e32 v103.l, v39.l
-; GFX11TRUE16-NEXT: s_waitcnt vmcnt(23)
-; GFX11TRUE16-NEXT: v_mov_b16_e32 v112.l, v48.l
+; GFX11TRUE16-NEXT: v_cndmask_b16 v1.h, v38.l, v39.l, s24
+; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v13, 16, v39
+; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v14, 16, v38
; GFX11TRUE16-NEXT: s_waitcnt vmcnt(22)
-; GFX11TRUE16-NEXT: v_mov_b16_e32 v113.l, v49.l
-; GFX11TRUE16-NEXT: s_waitcnt vmcnt(21)
-; GFX11TRUE16-NEXT: v_mov_b16_e32 v114.l, v50.l
-; GFX11TRUE16-NEXT: s_waitcnt vmcnt(20)
-; GFX11TRUE16-NEXT: v_mov_b16_e32 v115.l, v51.l
-; GFX11TRUE16-NEXT: s_waitcnt vmcnt(19)
-; GFX11TRUE16-NEXT: v_mov_b16_e32 v116.l, v52.l
+; GFX11TRUE16-NEXT: v_cndmask_b16 v2.l, v48.l, v49.l, s22
+; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v15, 16, v49
+; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v18, 16, v48
; GFX11TRUE16-NEXT: s_waitcnt vmcnt(18)
-; GFX11TRUE16-NEXT: v_mov_b16_e32 v117.l, v53.l
-; GFX11TRUE16-NEXT: s_waitcnt vmcnt(17)
-; GFX11TRUE16-NEXT: v_mov_b16_e32 v118.l, v54.l
-; GFX11TRUE16-NEXT: s_waitcnt vmcnt(16)
-; GFX11TRUE16-NEXT: v_mov_b16_e32 v119.l, v55.l
-; GFX11TRUE16-NEXT: s_waitcnt vmcnt(15)
-; GFX11TRUE16-NEXT: v_mov_b16_e64 v128.l, v64.l
+; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v21, 16, v53
+; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v22, 16, v52
; GFX11TRUE16-NEXT: s_waitcnt vmcnt(14)
-; GFX11TRUE16-NEXT: v_mov_b16_e64 v129.l, v65.l
-; GFX11TRUE16-NEXT: s_waitcnt vmcnt(13)
-; GFX11TRUE16-NEXT: v_mov_b16_e64 v130.l, v66.l
-; GFX11TRUE16-NEXT: s_waitcnt vmcnt(12)
-; GFX11TRUE16-NEXT: v_mov_b16_e64 v131.l, v67.l
+; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v25, 16, v65
+; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v26, 16, v64
+; GFX11TRUE16-NEXT: v_cndmask_b16 v2.h, v50.l, v51.l, s20
; GFX11TRUE16-NEXT: s_waitcnt vmcnt(11)
-; GFX11TRUE16-NEXT: v_mov_b16_e64 v132.l, v68.l
+; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v30, 16, v68
; GFX11TRUE16-NEXT: s_waitcnt vmcnt(10)
-; GFX11TRUE16-NEXT: v_mov_b16_e64 v133.l, v69.l
+; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v29, 16, v69
; GFX11TRUE16-NEXT: s_waitcnt vmcnt(9)
-; GFX11TRUE16-NEXT: v_mov_b16_e64 v134.l, v70.l
+; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v32, 16, v70
; GFX11TRUE16-NEXT: s_waitcnt vmcnt(8)
-; GFX11TRUE16-NEXT: v_mov_b16_e64 v135.l, v71.l
-; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v71, 16, v71
-; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v70, 16, v70
-; GFX11TRUE16-NEXT: v_and_b32_e32 v30, 1, v30
+; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v31, 16, v71
+; GFX11TRUE16-NEXT: s_waitcnt vmcnt(7)
+; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v34, 16, v80
+; GFX11TRUE16-NEXT: s_waitcnt vmcnt(6)
+; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v33, 16, v81
; GFX11TRUE16-NEXT: s_waitcnt vmcnt(5)
-; GFX11TRUE16-NEXT: v_mov_b16_e64 v146.l, v82.l
+; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v36, 16, v82
; GFX11TRUE16-NEXT: s_waitcnt vmcnt(4)
-; GFX11TRUE16-NEXT: v_mov_b16_e64 v147.l, v83.l
-; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v83, 16, v83
-; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v82, 16, v82
-; GFX11TRUE16-NEXT: v_and_b32_e32 v28, 1, v28
-; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v30
-; GFX11TRUE16-NEXT: v_and_b32_e32 v3, 1, v3
+; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v35, 16, v83
; GFX11TRUE16-NEXT: s_waitcnt vmcnt(3)
-; GFX11TRUE16-NEXT: v_mov_b16_e32 v30.l, v84.l
-; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v84, 16, v84
-; GFX11TRUE16-NEXT: v_and_b32_e32 v26, 1, v26
-; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v96, v96, v97, vcc_lo
-; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v28
-; GFX11TRUE16-NEXT: v_and_b32_e32 v1, 1, v1
+; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v38, 16, v84
; GFX11TRUE16-NEXT: s_waitcnt vmcnt(2)
-; GFX11TRUE16-NEXT: v_mov_b16_e32 v97.l, v85.l
-; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v85, 16, v85
-; GFX11TRUE16-NEXT: v_and_b32_e32 v24, 1, v24
-; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v98, v98, v99, vcc_lo
-; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v26
-; GFX11TRUE16-NEXT: v_and_b32_e32 v7, 1, v7
-; GFX11TRUE16-NEXT: v_mov_b16_e64 v144.l, v80.l
-; GFX11TRUE16-NEXT: v_mov_b16_e64 v145.l, v81.l
-; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v81, 16, v81
-; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v26, v100, v101, vcc_lo
-; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v80, 16, v80
-; GFX11TRUE16-NEXT: v_and_b32_e32 v22, 1, v22
-; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v24
-; GFX11TRUE16-NEXT: v_and_b32_e32 v5, 1, v5
-; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v69, 16, v69
-; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v68, 16, v68
-; GFX11TRUE16-NEXT: v_and_b32_e32 v20, 1, v20
-; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v24, v102, v103, vcc_lo
-; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v22
-; GFX11TRUE16-NEXT: v_and_b32_e32 v11, 1, v11
-; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v67, 16, v67
-; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v66, 16, v66
-; GFX11TRUE16-NEXT: v_and_b32_e32 v18, 1, v18
-; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v22, v112, v113, vcc_lo
-; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v20
-; GFX11TRUE16-NEXT: v_and_b32_e32 v9, 1, v9
-; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v65, 16, v65
-; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v64, 16, v64
-; GFX11TRUE16-NEXT: v_and_b32_e32 v16, 1, v16
-; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v20, v114, v115, vcc_lo
-; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v18
-; GFX11TRUE16-NEXT: v_and_b32_e32 v15, 1, v15
-; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v55, 16, v55
-; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v54, 16, v54
-; GFX11TRUE16-NEXT: v_and_b32_e32 v14, 1, v14
-; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v18, v116, v117, vcc_lo
-; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v16
-; GFX11TRUE16-NEXT: v_and_b32_e32 v13, 1, v13
-; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v53, 16, v53
-; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v52, 16, v52
-; GFX11TRUE16-NEXT: v_and_b32_e32 v12, 1, v12
-; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v16, v118, v119, vcc_lo
-; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v14
-; GFX11TRUE16-NEXT: v_and_b32_e32 v19, 1, v19
-; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v51, 16, v51
-; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v50, 16, v50
-; GFX11TRUE16-NEXT: v_and_b32_e32 v10, 1, v10
-; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v14, v128, v129, vcc_lo
-; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v12
-; GFX11TRUE16-NEXT: v_and_b32_e32 v17, 1, v17
-; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v49, 16, v49
-; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v48, 16, v48
-; GFX11TRUE16-NEXT: v_and_b32_e32 v8, 1, v8
-; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v12, v130, v131, vcc_lo
-; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v10
-; GFX11TRUE16-NEXT: v_and_b32_e32 v23, 1, v23
-; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v39, 16, v39
-; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v38, 16, v38
-; GFX11TRUE16-NEXT: v_and_b32_e32 v6, 1, v6
-; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v10, v132, v133, vcc_lo
-; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v8
-; GFX11TRUE16-NEXT: v_and_b32_e32 v21, 1, v21
-; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v37, 16, v37
-; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v36, 16, v36
-; GFX11TRUE16-NEXT: v_and_b32_e32 v4, 1, v4
-; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v8, v134, v135, vcc_lo
-; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v6
-; GFX11TRUE16-NEXT: v_and_b32_e32 v27, 1, v27
-; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v35, 16, v35
-; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v34, 16, v34
-; GFX11TRUE16-NEXT: v_and_b32_e32 v2, 1, v2
-; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v6, v144, v145, vcc_lo
-; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v4
-; GFX11TRUE16-NEXT: v_and_b32_e32 v25, 1, v25
-; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v33, 16, v33
-; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v32, 16, v32
-; GFX11TRUE16-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v4, v146, v147, vcc_lo
-; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v2
-; GFX11TRUE16-NEXT: v_and_b32_e32 v31, 1, v31
+; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v37, 16, v85
; GFX11TRUE16-NEXT: s_waitcnt vmcnt(1)
-; GFX11TRUE16-NEXT: v_mov_b16_e32 v28.l, v86.l
+; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v48, 16, v86
; GFX11TRUE16-NEXT: s_waitcnt vmcnt(0)
-; GFX11TRUE16-NEXT: v_mov_b16_e32 v99.l, v87.l
-; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v87, 16, v87
-; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v2, v30, v97, vcc_lo
-; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX11TRUE16-NEXT: v_and_b32_e32 v29, 1, v29
-; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v86, 16, v86
-; GFX11TRUE16-NEXT: v_mov_b16_e32 v30.l, v84.l
-; GFX11TRUE16-NEXT: v_mov_b16_e32 v84.l, v85.l
-; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v0, v28, v99, vcc_lo
-; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v31
-; GFX11TRUE16-NEXT: v_mov_b16_e32 v28.l, v86.l
-; GFX11TRUE16-NEXT: v_mov_b16_e32 v85.l, v87.l
-; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v31, v32, v33, vcc_lo
-; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v29
-; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v29, v34, v35, vcc_lo
-; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v27
-; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v27, v36, v37, vcc_lo
-; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v25
-; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v25, v38, v39, vcc_lo
-; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v23
-; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v23, v48, v49, vcc_lo
-; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v21
-; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v21, v50, v51, vcc_lo
-; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v19
-; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v19, v52, v53, vcc_lo
-; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v17
-; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v17, v54, v55, vcc_lo
-; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v15
-; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v15, v64, v65, vcc_lo
-; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v13
-; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v13, v66, v67, vcc_lo
-; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v11
-; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v11, v68, v69, vcc_lo
-; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v7
-; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v7, v80, v81, vcc_lo
-; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v3
-; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v3, v30, v84, vcc_lo
-; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1
-; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v1, v28, v85, vcc_lo
-; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v5
-; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11TRUE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v5, v82, v83, vcc_lo
-; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v9
-; GFX11TRUE16-NEXT: v_perm_b32 v1, v3, v2, 0x5040100
-; GFX11TRUE16-NEXT: v_perm_b32 v3, v7, v6, 0x5040100
-; GFX11TRUE16-NEXT: v_perm_b32 v6, v13, v12, 0x5040100
-; GFX11TRUE16-NEXT: v_perm_b32 v2, v5, v4, 0x5040100
-; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v9, v70, v71, vcc_lo
-; GFX11TRUE16-NEXT: v_perm_b32 v5, v11, v10, 0x5040100
-; GFX11TRUE16-NEXT: v_perm_b32 v7, v15, v14, 0x5040100
-; GFX11TRUE16-NEXT: v_perm_b32 v10, v21, v20, 0x5040100
-; GFX11TRUE16-NEXT: v_perm_b32 v11, v23, v22, 0x5040100
-; GFX11TRUE16-NEXT: v_perm_b32 v4, v9, v8, 0x5040100
-; GFX11TRUE16-NEXT: v_perm_b32 v8, v17, v16, 0x5040100
-; GFX11TRUE16-NEXT: v_perm_b32 v9, v19, v18, 0x5040100
-; GFX11TRUE16-NEXT: v_perm_b32 v12, v25, v24, 0x5040100
-; GFX11TRUE16-NEXT: v_perm_b32 v13, v27, v26, 0x5040100
-; GFX11TRUE16-NEXT: v_perm_b32 v14, v29, v98, 0x5040100
-; GFX11TRUE16-NEXT: v_perm_b32 v15, v31, v96, 0x5040100
+; GFX11TRUE16-NEXT: v_cndmask_b16 v7.h, v86.l, v87.l, s0
+; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v39, 16, v87
+; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 1, v8
+; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v19, 16, v51
+; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v20, 16, v50
+; GFX11TRUE16-NEXT: v_cndmask_b16 v3.l, v52.l, v53.l, s18
+; GFX11TRUE16-NEXT: v_cndmask_b16 v3.h, v54.l, v55.l, s16
+; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v23, 16, v55
+; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v24, 16, v54
+; GFX11TRUE16-NEXT: v_cndmask_b16 v4.l, v64.l, v65.l, s14
+; GFX11TRUE16-NEXT: v_cndmask_b16 v4.h, v66.l, v67.l, s12
+; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v27, 16, v67
+; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v28, 16, v66
+; GFX11TRUE16-NEXT: v_cndmask_b16 v5.h, v70.l, v71.l, s8
+; GFX11TRUE16-NEXT: v_cndmask_b16 v6.h, v82.l, v83.l, s4
+; GFX11TRUE16-NEXT: v_cndmask_b16 v8.l, v10.l, v9.l, s28
+; GFX11TRUE16-NEXT: v_cndmask_b16 v8.h, v12.l, v11.l, s25
+; GFX11TRUE16-NEXT: v_cndmask_b16 v9.l, v14.l, v13.l, s23
+; GFX11TRUE16-NEXT: v_cndmask_b16 v9.h, v18.l, v15.l, s21
+; GFX11TRUE16-NEXT: v_cndmask_b16 v10.h, v22.l, v21.l, s17
+; GFX11TRUE16-NEXT: v_cndmask_b16 v11.h, v26.l, v25.l, s13
+; GFX11TRUE16-NEXT: v_cndmask_b16 v12.h, v30.l, v29.l, s9
+; GFX11TRUE16-NEXT: v_cndmask_b16 v13.l, v32.l, v31.l, s7
+; GFX11TRUE16-NEXT: v_cndmask_b16 v13.h, v34.l, v33.l, s5
+; GFX11TRUE16-NEXT: v_cndmask_b16 v14.l, v36.l, v35.l, s3
+; GFX11TRUE16-NEXT: v_cndmask_b16 v14.h, v38.l, v37.l, s1
+; GFX11TRUE16-NEXT: v_cndmask_b16 v15.l, v48.l, v39.l, vcc_lo
+; GFX11TRUE16-NEXT: v_cndmask_b16 v15.h, v17.l, v16.l, s0
+; GFX11TRUE16-NEXT: v_cndmask_b16 v5.l, v68.l, v69.l, s10
+; GFX11TRUE16-NEXT: v_cndmask_b16 v6.l, v80.l, v81.l, s6
+; GFX11TRUE16-NEXT: v_cndmask_b16 v7.l, v84.l, v85.l, s2
+; GFX11TRUE16-NEXT: v_cndmask_b16 v10.l, v20.l, v19.l, s19
+; GFX11TRUE16-NEXT: v_cndmask_b16 v11.l, v24.l, v23.l, s15
+; GFX11TRUE16-NEXT: v_cndmask_b16 v12.l, v28.l, v27.l, s11
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v18.l, v7.h
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v19.l, v6.h
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v20.l, v5.h
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v21.l, v4.h
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v22.l, v4.l
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v23.l, v3.h
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v24.l, v3.l
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v25.l, v2.h
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v26.l, v2.l
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v27.l, v1.h
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v28.l, v1.l
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v29.l, v0.h
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v30.l, v0.l
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.l, v15.l
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v1.l, v14.h
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v2.l, v14.l
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v3.l, v13.h
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v4.l, v13.l
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v13.l, v12.h
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v14.l, v11.h
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v16.l, v10.h
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v17.l, v9.h
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v31.l, v9.l
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v32.l, v8.h
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v33.l, v8.l
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v15.l, v15.h
+; GFX11TRUE16-NEXT: v_perm_b32 v0, v0, v18, 0x5040100
+; GFX11TRUE16-NEXT: v_perm_b32 v1, v1, v7, 0x5040100
+; GFX11TRUE16-NEXT: v_perm_b32 v2, v2, v19, 0x5040100
+; GFX11TRUE16-NEXT: v_perm_b32 v3, v3, v6, 0x5040100
+; GFX11TRUE16-NEXT: v_perm_b32 v4, v4, v20, 0x5040100
+; GFX11TRUE16-NEXT: v_perm_b32 v5, v13, v5, 0x5040100
+; GFX11TRUE16-NEXT: v_perm_b32 v6, v12, v21, 0x5040100
+; GFX11TRUE16-NEXT: v_perm_b32 v7, v14, v22, 0x5040100
+; GFX11TRUE16-NEXT: v_perm_b32 v8, v11, v23, 0x5040100
+; GFX11TRUE16-NEXT: v_perm_b32 v9, v16, v24, 0x5040100
+; GFX11TRUE16-NEXT: v_perm_b32 v10, v10, v25, 0x5040100
+; GFX11TRUE16-NEXT: v_perm_b32 v11, v17, v26, 0x5040100
+; GFX11TRUE16-NEXT: v_perm_b32 v12, v31, v27, 0x5040100
+; GFX11TRUE16-NEXT: v_perm_b32 v13, v32, v28, 0x5040100
+; GFX11TRUE16-NEXT: v_perm_b32 v14, v33, v29, 0x5040100
+; GFX11TRUE16-NEXT: v_perm_b32 v15, v15, v30, 0x5040100
; GFX11TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11FAKE16-LABEL: v_vselect_v32bf16:
diff --git a/llvm/test/CodeGen/AMDGPU/v_cndmask.ll b/llvm/test/CodeGen/AMDGPU/v_cndmask.ll
index f20c1ccb2d63eb..c6cc479b5deb1e 100644
--- a/llvm/test/CodeGen/AMDGPU/v_cndmask.ll
+++ b/llvm/test/CodeGen/AMDGPU/v_cndmask.ll
@@ -3,6 +3,7 @@
; RUN: llc -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=VI %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize64 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX10 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX12 %s
declare i32 @llvm.amdgcn.workitem.id.x() #1
declare half @llvm.fabs.f16(half)
@@ -90,6 +91,24 @@ define amdgpu_kernel void @v_cnd_nan_nosgpr(ptr addrspace(1) %out, i32 %c, ptr a
; GFX11-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX11-NEXT: s_endpgm
+;
+; GFX12-LABEL: v_cnd_nan_nosgpr:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_load_b64 s[0:1], s[4:5], 0x34
+; GFX12-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX12-NEXT: v_mov_b32_e32 v1, 0
+; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX12-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: global_load_b32 v0, v0, s[0:1]
+; GFX12-NEXT: s_load_b96 s[0:2], s[4:5], 0x24
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: s_cmp_eq_u32 s2, 0
+; GFX12-NEXT: s_cselect_b64 vcc, -1, 0
+; GFX12-NEXT: s_wait_loadcnt 0x0
+; GFX12-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc
+; GFX12-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX12-NEXT: s_endpgm
%idx = call i32 @llvm.amdgcn.workitem.id.x() #1
%f.gep = getelementptr float, ptr addrspace(1) %fptr, i32 %idx
%f = load float, ptr addrspace(1) %f.gep
@@ -155,6 +174,18 @@ define amdgpu_kernel void @v_cnd_nan(ptr addrspace(1) %out, i32 %c, float %f) #0
; GFX11-NEXT: v_cndmask_b32_e64 v1, -1, s3, s[4:5]
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX11-NEXT: s_endpgm
+;
+; GFX12-LABEL: v_cnd_nan:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX12-NEXT: v_mov_b32_e32 v0, 0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: s_cmp_eq_u32 s2, 0
+; GFX12-NEXT: s_cselect_b32 s2, s3, -1
+; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX12-NEXT: v_mov_b32_e32 v1, s2
+; GFX12-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX12-NEXT: s_endpgm
%setcc = icmp ne i32 %c, 0
%select = select i1 %setcc, float 0xFFFFFFFFE0000000, float %f
store float %select, ptr addrspace(1) %out
@@ -220,6 +251,21 @@ define amdgpu_kernel void @fcmp_sgprX_k0_select_k1_sgprZ_f32(ptr addrspace(1) %o
; GFX11-NEXT: v_cndmask_b32_e64 v1, 1.0, s1, s[4:5]
; GFX11-NEXT: global_store_b32 v0, v1, s[2:3]
; GFX11-NEXT: s_endpgm
+;
+; GFX12-LABEL: fcmp_sgprX_k0_select_k1_sgprZ_f32:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_clause 0x1
+; GFX12-NEXT: s_load_b64 s[0:1], s[4:5], 0x4c
+; GFX12-NEXT: s_load_b64 s[2:3], s[4:5], 0x24
+; GFX12-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(SALU_CYCLE_1)
+; GFX12-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: s_cmp_nlg_f32 s0, 0
+; GFX12-NEXT: s_cselect_b32 s0, s1, 1.0
+; GFX12-NEXT: v_mov_b32_e32 v1, s0
+; GFX12-NEXT: global_store_b32 v0, v1, s[2:3]
+; GFX12-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
%tid.ext = sext i32 %tid to i64
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
@@ -285,6 +331,19 @@ define amdgpu_kernel void @fcmp_sgprX_k0_select_k1_sgprX_f32(ptr addrspace(1) %o
; GFX11-NEXT: v_cndmask_b32_e64 v1, 1.0, s6, s[2:3]
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX11-NEXT: s_endpgm
+;
+; GFX12-LABEL: fcmp_sgprX_k0_select_k1_sgprX_f32:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_load_b96 s[0:2], s[4:5], 0x24
+; GFX12-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(SALU_CYCLE_1)
+; GFX12-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: s_cmp_nlg_f32 s2, 0
+; GFX12-NEXT: s_cselect_b32 s2, s2, 1.0
+; GFX12-NEXT: v_mov_b32_e32 v1, s2
+; GFX12-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX12-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
%tid.ext = sext i32 %tid to i64
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
@@ -350,6 +409,21 @@ define amdgpu_kernel void @fcmp_sgprX_k0_select_k0_sgprZ_f32(ptr addrspace(1) %o
; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, s1, s[4:5]
; GFX11-NEXT: global_store_b32 v0, v1, s[2:3]
; GFX11-NEXT: s_endpgm
+;
+; GFX12-LABEL: fcmp_sgprX_k0_select_k0_sgprZ_f32:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_clause 0x1
+; GFX12-NEXT: s_load_b64 s[0:1], s[4:5], 0x4c
+; GFX12-NEXT: s_load_b64 s[2:3], s[4:5], 0x24
+; GFX12-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(SALU_CYCLE_1)
+; GFX12-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: s_cmp_nlg_f32 s0, 0
+; GFX12-NEXT: s_cselect_b32 s0, s1, 0
+; GFX12-NEXT: v_mov_b32_e32 v1, s0
+; GFX12-NEXT: global_store_b32 v0, v1, s[2:3]
+; GFX12-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
%tid.ext = sext i32 %tid to i64
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
@@ -415,6 +489,19 @@ define amdgpu_kernel void @fcmp_sgprX_k0_select_k0_sgprX_f32(ptr addrspace(1) %o
; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, s6, s[2:3]
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX11-NEXT: s_endpgm
+;
+; GFX12-LABEL: fcmp_sgprX_k0_select_k0_sgprX_f32:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_load_b96 s[0:2], s[4:5], 0x24
+; GFX12-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(SALU_CYCLE_1)
+; GFX12-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: s_cmp_nlg_f32 s2, 0
+; GFX12-NEXT: s_cselect_b32 s2, s2, 0
+; GFX12-NEXT: v_mov_b32_e32 v1, s2
+; GFX12-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX12-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
%tid.ext = sext i32 %tid to i64
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
@@ -498,6 +585,23 @@ define amdgpu_kernel void @fcmp_sgprX_k0_select_k0_vgprZ_f32(ptr addrspace(1) %o
; GFX11-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX11-NEXT: s_endpgm
+;
+; GFX12-LABEL: fcmp_sgprX_k0_select_k0_vgprZ_f32:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_load_b64 s[0:1], s[4:5], 0x34
+; GFX12-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: global_load_b32 v1, v0, s[0:1]
+; GFX12-NEXT: s_load_b96 s[0:2], s[4:5], 0x24
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: s_cmp_nlg_f32 s2, 0
+; GFX12-NEXT: s_cselect_b64 vcc, -1, 0
+; GFX12-NEXT: s_wait_loadcnt 0x0
+; GFX12-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX12-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX12-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
%tid.ext = sext i32 %tid to i64
%z.gep = getelementptr inbounds float, ptr addrspace(1) %z.ptr, i64 %tid.ext
@@ -583,6 +687,23 @@ define amdgpu_kernel void @fcmp_sgprX_k0_select_k1_vgprZ_f32(ptr addrspace(1) %o
; GFX11-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX11-NEXT: s_endpgm
+;
+; GFX12-LABEL: fcmp_sgprX_k0_select_k1_vgprZ_f32:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_load_b64 s[0:1], s[4:5], 0x34
+; GFX12-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: global_load_b32 v1, v0, s[0:1]
+; GFX12-NEXT: s_load_b96 s[0:2], s[4:5], 0x24
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: s_cmp_nlg_f32 s2, 0
+; GFX12-NEXT: s_cselect_b64 vcc, -1, 0
+; GFX12-NEXT: s_wait_loadcnt 0x0
+; GFX12-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GFX12-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX12-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
%tid.ext = sext i32 %tid to i64
%z.gep = getelementptr inbounds float, ptr addrspace(1) %z.ptr, i64 %tid.ext
@@ -661,6 +782,21 @@ define amdgpu_kernel void @fcmp_vgprX_k0_select_k1_sgprZ_f32(ptr addrspace(1) %o
; GFX11-NEXT: v_cndmask_b32_e64 v1, 1.0, s4, vcc
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX11-NEXT: s_endpgm
+;
+; GFX12-LABEL: fcmp_vgprX_k0_select_k1_sgprZ_f32:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX12-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX12-NEXT: s_load_b32 s4, s[4:5], 0x34
+; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: global_load_b32 v1, v0, s[2:3]
+; GFX12-NEXT: s_wait_loadcnt 0x0
+; GFX12-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v1
+; GFX12-NEXT: v_cndmask_b32_e64 v1, 1.0, s4, vcc
+; GFX12-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX12-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
%tid.ext = sext i32 %tid to i64
%x.gep = getelementptr inbounds float, ptr addrspace(1) %x.ptr, i64 %tid.ext
@@ -751,6 +887,24 @@ define amdgpu_kernel void @fcmp_vgprX_k0_select_k1_vgprZ_f32(ptr addrspace(1) %o
; GFX11-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX11-NEXT: s_endpgm
+;
+; GFX12-LABEL: fcmp_vgprX_k0_select_k1_vgprZ_f32:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_clause 0x1
+; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX12-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
+; GFX12-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: global_load_b32 v1, v0, s[2:3] scope:SCOPE_SYS
+; GFX12-NEXT: s_wait_loadcnt 0x0
+; GFX12-NEXT: global_load_b32 v2, v0, s[4:5] scope:SCOPE_SYS
+; GFX12-NEXT: s_wait_loadcnt 0x0
+; GFX12-NEXT: v_cmp_le_f32_e32 vcc, 0, v1
+; GFX12-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
+; GFX12-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX12-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
%tid.ext = sext i32 %tid to i64
%x.gep = getelementptr inbounds float, ptr addrspace(1) %x.ptr, i64 %tid.ext
@@ -843,6 +997,24 @@ define amdgpu_kernel void @icmp_vgprX_k0_select_k1_vgprZ_i32(ptr addrspace(1) %o
; GFX11-NEXT: v_cndmask_b32_e32 v1, 2, v2, vcc
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX11-NEXT: s_endpgm
+;
+; GFX12-LABEL: icmp_vgprX_k0_select_k1_vgprZ_i32:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_clause 0x1
+; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX12-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
+; GFX12-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: global_load_b32 v1, v0, s[2:3] scope:SCOPE_SYS
+; GFX12-NEXT: s_wait_loadcnt 0x0
+; GFX12-NEXT: global_load_b32 v2, v0, s[4:5] scope:SCOPE_SYS
+; GFX12-NEXT: s_wait_loadcnt 0x0
+; GFX12-NEXT: v_cmp_lt_i32_e32 vcc, -1, v1
+; GFX12-NEXT: v_cndmask_b32_e32 v1, 2, v2, vcc
+; GFX12-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX12-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
%tid.ext = sext i32 %tid to i64
%x.gep = getelementptr inbounds i32, ptr addrspace(1) %x.ptr, i64 %tid.ext
@@ -939,6 +1111,25 @@ define amdgpu_kernel void @icmp_vgprX_k0_select_k1_vgprZ_i64(ptr addrspace(1) %o
; GFX11-NEXT: v_cndmask_b32_e32 v0, 2, v2, vcc
; GFX11-NEXT: global_store_b64 v4, v[0:1], s[0:1]
; GFX11-NEXT: s_endpgm
+;
+; GFX12-LABEL: icmp_vgprX_k0_select_k1_vgprZ_i64:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_clause 0x1
+; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX12-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
+; GFX12-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-NEXT: v_lshlrev_b32_e32 v4, 3, v0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: global_load_b64 v[0:1], v4, s[2:3] scope:SCOPE_SYS
+; GFX12-NEXT: s_wait_loadcnt 0x0
+; GFX12-NEXT: global_load_b64 v[2:3], v4, s[4:5] scope:SCOPE_SYS
+; GFX12-NEXT: s_wait_loadcnt 0x0
+; GFX12-NEXT: v_cmp_lt_i64_e32 vcc, -1, v[0:1]
+; GFX12-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc
+; GFX12-NEXT: v_cndmask_b32_e32 v0, 2, v2, vcc
+; GFX12-NEXT: global_store_b64 v4, v[0:1], s[0:1]
+; GFX12-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
%tid.ext = sext i32 %tid to i64
%x.gep = getelementptr inbounds i64, ptr addrspace(1) %x.ptr, i64 %tid.ext
@@ -1048,6 +1239,28 @@ define amdgpu_kernel void @fcmp_vgprX_k0_select_vgprZ_k1_v4f32(ptr addrspace(1)
; GFX11-NEXT: v_cndmask_b32_e32 v0, 1.0, v0, vcc
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[0:1]
; GFX11-NEXT: s_endpgm
+;
+; GFX12-LABEL: fcmp_vgprX_k0_select_vgprZ_k1_v4f32:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_clause 0x1
+; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX12-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
+; GFX12-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-NEXT: v_lshlrev_b32_e32 v1, 2, v0
+; GFX12-NEXT: v_lshlrev_b32_e32 v4, 4, v0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: global_load_b32 v5, v1, s[2:3] scope:SCOPE_SYS
+; GFX12-NEXT: s_wait_loadcnt 0x0
+; GFX12-NEXT: global_load_b128 v[0:3], v4, s[4:5] scope:SCOPE_SYS
+; GFX12-NEXT: s_wait_loadcnt 0x0
+; GFX12-NEXT: v_cmp_nge_f32_e32 vcc, 4.0, v5
+; GFX12-NEXT: v_cndmask_b32_e32 v3, 4.0, v3, vcc
+; GFX12-NEXT: v_cndmask_b32_e32 v2, -0.5, v2, vcc
+; GFX12-NEXT: v_cndmask_b32_e32 v1, 2.0, v1, vcc
+; GFX12-NEXT: v_cndmask_b32_e32 v0, 1.0, v0, vcc
+; GFX12-NEXT: global_store_b128 v4, v[0:3], s[0:1]
+; GFX12-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
%tid.ext = sext i32 %tid to i64
%x.gep = getelementptr inbounds float, ptr addrspace(1) %x.ptr, i64 %tid.ext
@@ -1157,6 +1370,28 @@ define amdgpu_kernel void @fcmp_vgprX_k0_select_k1_vgprZ_v4f32(ptr addrspace(1)
; GFX11-NEXT: v_cndmask_b32_e32 v0, 1.0, v0, vcc
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[0:1]
; GFX11-NEXT: s_endpgm
+;
+; GFX12-LABEL: fcmp_vgprX_k0_select_k1_vgprZ_v4f32:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_clause 0x1
+; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX12-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
+; GFX12-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-NEXT: v_lshlrev_b32_e32 v1, 2, v0
+; GFX12-NEXT: v_lshlrev_b32_e32 v4, 4, v0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: global_load_b32 v5, v1, s[2:3] scope:SCOPE_SYS
+; GFX12-NEXT: s_wait_loadcnt 0x0
+; GFX12-NEXT: global_load_b128 v[0:3], v4, s[4:5] scope:SCOPE_SYS
+; GFX12-NEXT: s_wait_loadcnt 0x0
+; GFX12-NEXT: v_cmp_ge_f32_e32 vcc, 4.0, v5
+; GFX12-NEXT: v_cndmask_b32_e32 v3, 4.0, v3, vcc
+; GFX12-NEXT: v_cndmask_b32_e32 v2, -0.5, v2, vcc
+; GFX12-NEXT: v_cndmask_b32_e32 v1, 2.0, v1, vcc
+; GFX12-NEXT: v_cndmask_b32_e32 v0, 1.0, v0, vcc
+; GFX12-NEXT: global_store_b128 v4, v[0:3], s[0:1]
+; GFX12-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
%tid.ext = sext i32 %tid to i64
%x.gep = getelementptr inbounds float, ptr addrspace(1) %x.ptr, i64 %tid.ext
@@ -1268,6 +1503,28 @@ define amdgpu_kernel void @fcmp_k0_vgprX_select_k1_vgprZ_v4f32(ptr addrspace(1)
; GFX11-NEXT: v_cndmask_b32_e32 v0, 1.0, v0, vcc
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[0:1]
; GFX11-NEXT: s_endpgm
+;
+; GFX12-LABEL: fcmp_k0_vgprX_select_k1_vgprZ_v4f32:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_clause 0x1
+; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX12-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
+; GFX12-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-NEXT: v_lshlrev_b32_e32 v1, 2, v0
+; GFX12-NEXT: v_lshlrev_b32_e32 v4, 4, v0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: global_load_b32 v5, v1, s[2:3] scope:SCOPE_SYS
+; GFX12-NEXT: s_wait_loadcnt 0x0
+; GFX12-NEXT: global_load_b128 v[0:3], v4, s[4:5] scope:SCOPE_SYS
+; GFX12-NEXT: s_wait_loadcnt 0x0
+; GFX12-NEXT: v_cmp_le_f32_e32 vcc, 4.0, v5
+; GFX12-NEXT: v_cndmask_b32_e32 v3, 4.0, v3, vcc
+; GFX12-NEXT: v_cndmask_b32_e32 v2, -0.5, v2, vcc
+; GFX12-NEXT: v_cndmask_b32_e32 v1, 2.0, v1, vcc
+; GFX12-NEXT: v_cndmask_b32_e32 v0, 1.0, v0, vcc
+; GFX12-NEXT: global_store_b128 v4, v[0:3], s[0:1]
+; GFX12-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
%tid.ext = sext i32 %tid to i64
%x.gep = getelementptr inbounds float, ptr addrspace(1) %x.ptr, i64 %tid.ext
@@ -1375,6 +1632,29 @@ define amdgpu_kernel void @icmp_vgprX_k0_select_k1_vgprZ_i1(ptr addrspace(1) %ou
; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[0:1]
; GFX11-NEXT: global_store_b8 v0, v1, s[8:9]
; GFX11-NEXT: s_endpgm
+;
+; GFX12-LABEL: icmp_vgprX_k0_select_k1_vgprZ_i1:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_clause 0x1
+; GFX12-NEXT: s_load_b128 s[8:11], s[4:5], 0x24
+; GFX12-NEXT: s_load_b64 s[0:1], s[4:5], 0x34
+; GFX12-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-NEXT: v_lshlrev_b32_e32 v1, 2, v0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: global_load_b32 v1, v1, s[10:11] scope:SCOPE_SYS
+; GFX12-NEXT: s_wait_loadcnt 0x0
+; GFX12-NEXT: global_load_u8 v2, v0, s[0:1] scope:SCOPE_SYS
+; GFX12-NEXT: s_wait_loadcnt 0x0
+; GFX12-NEXT: v_cmp_gt_i32_e32 vcc, 0, v1
+; GFX12-NEXT: v_and_b32_e32 v2, 1, v2
+; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX12-NEXT: v_cmp_eq_u32_e64 s[0:1], 1, v2
+; GFX12-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
+; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX12-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[0:1]
+; GFX12-NEXT: global_store_b8 v0, v1, s[8:9]
+; GFX12-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
%tid.ext = sext i32 %tid to i64
%x.gep = getelementptr inbounds i32, ptr addrspace(1) %x.ptr, i64 %tid.ext
@@ -1479,6 +1759,26 @@ define amdgpu_kernel void @fcmp_vgprX_k0_selectf64_k1_vgprZ_f32(ptr addrspace(1)
; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
; GFX11-NEXT: s_endpgm
+;
+; GFX12-LABEL: fcmp_vgprX_k0_selectf64_k1_vgprZ_f32:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_clause 0x1
+; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX12-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
+; GFX12-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-NEXT: v_lshlrev_b32_e32 v1, 2, v0
+; GFX12-NEXT: v_lshlrev_b32_e32 v2, 3, v0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: global_load_b32 v3, v1, s[2:3] scope:SCOPE_SYS
+; GFX12-NEXT: s_wait_loadcnt 0x0
+; GFX12-NEXT: global_load_b64 v[0:1], v2, s[4:5] scope:SCOPE_SYS
+; GFX12-NEXT: s_wait_loadcnt 0x0
+; GFX12-NEXT: v_cmp_le_f32_e32 vcc, 0, v3
+; GFX12-NEXT: v_cndmask_b32_e32 v1, 0x3ff00000, v1, vcc
+; GFX12-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc
+; GFX12-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX12-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
%tid.ext = sext i32 %tid to i64
%x.gep = getelementptr inbounds float, ptr addrspace(1) %x.ptr, i64 %tid.ext
@@ -1581,6 +1881,26 @@ define amdgpu_kernel void @fcmp_vgprX_k0_selecti64_k1_vgprZ_f32(ptr addrspace(1)
; GFX11-NEXT: v_cndmask_b32_e32 v0, 3, v0, vcc
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
; GFX11-NEXT: s_endpgm
+;
+; GFX12-LABEL: fcmp_vgprX_k0_selecti64_k1_vgprZ_f32:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_clause 0x1
+; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX12-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
+; GFX12-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-NEXT: v_lshlrev_b32_e32 v1, 2, v0
+; GFX12-NEXT: v_lshlrev_b32_e32 v2, 3, v0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: global_load_b32 v3, v1, s[2:3] scope:SCOPE_SYS
+; GFX12-NEXT: s_wait_loadcnt 0x0
+; GFX12-NEXT: global_load_b64 v[0:1], v2, s[4:5] scope:SCOPE_SYS
+; GFX12-NEXT: s_wait_loadcnt 0x0
+; GFX12-NEXT: v_cmp_nlg_f32_e32 vcc, 0, v3
+; GFX12-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX12-NEXT: v_cndmask_b32_e32 v0, 3, v0, vcc
+; GFX12-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX12-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
%tid.ext = sext i32 %tid to i64
%x.gep = getelementptr inbounds float, ptr addrspace(1) %x.ptr, i64 %tid.ext
@@ -1674,6 +1994,24 @@ define amdgpu_kernel void @icmp_vgprX_k0_selectf32_k1_vgprZ_i32(ptr addrspace(1)
; GFX11-NEXT: v_cndmask_b32_e32 v1, 4.0, v2, vcc
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX11-NEXT: s_endpgm
+;
+; GFX12-LABEL: icmp_vgprX_k0_selectf32_k1_vgprZ_i32:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_clause 0x1
+; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX12-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
+; GFX12-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: global_load_b32 v1, v0, s[2:3] scope:SCOPE_SYS
+; GFX12-NEXT: s_wait_loadcnt 0x0
+; GFX12-NEXT: global_load_b32 v2, v0, s[4:5] scope:SCOPE_SYS
+; GFX12-NEXT: s_wait_loadcnt 0x0
+; GFX12-NEXT: v_cmp_gt_u32_e32 vcc, 2, v1
+; GFX12-NEXT: v_cndmask_b32_e32 v1, 4.0, v2, vcc
+; GFX12-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX12-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
%tid.ext = sext i32 %tid to i64
%x.gep = getelementptr inbounds i32, ptr addrspace(1) %x.ptr, i64 %tid.ext
@@ -1783,6 +2121,28 @@ define amdgpu_kernel void @fcmp_k0_vgprX_select_k1_vgprZ_f32_cond_use_x2(ptr add
; GFX11-NEXT: global_store_b32 v0, v2, s[0:1] dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_endpgm
+;
+; GFX12-LABEL: fcmp_k0_vgprX_select_k1_vgprZ_f32_cond_use_x2:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_clause 0x1
+; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX12-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
+; GFX12-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: global_load_b32 v1, v0, s[2:3] scope:SCOPE_SYS
+; GFX12-NEXT: s_wait_loadcnt 0x0
+; GFX12-NEXT: global_load_b32 v2, v0, s[4:5] scope:SCOPE_SYS
+; GFX12-NEXT: s_wait_loadcnt 0x0
+; GFX12-NEXT: v_cmp_nle_f32_e32 vcc, 4.0, v1
+; GFX12-NEXT: v_cndmask_b32_e64 v1, v2, -1.0, vcc
+; GFX12-NEXT: v_cndmask_b32_e64 v2, v2, -2.0, vcc
+; GFX12-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
+; GFX12-NEXT: s_wait_storecnt 0x0
+; GFX12-NEXT: global_store_b32 v0, v2, s[0:1] scope:SCOPE_SYS
+; GFX12-NEXT: s_wait_storecnt 0x0
+; GFX12-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
%tid.ext = sext i32 %tid to i64
%x.gep = getelementptr inbounds float, ptr addrspace(1) %x.ptr, i64 %tid.ext
@@ -1890,6 +2250,27 @@ define amdgpu_kernel void @v_cndmask_abs_neg_f16(ptr addrspace(1) %out, i32 %c,
; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; GFX11-NEXT: global_store_b16 v2, v0, s[0:1]
; GFX11-NEXT: s_endpgm
+;
+; GFX12-LABEL: v_cndmask_abs_neg_f16:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_load_b64 s[0:1], s[4:5], 0x34
+; GFX12-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX12-NEXT: v_mov_b32_e32 v2, 0
+; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX12-NEXT: v_lshlrev_b32_e32 v0, 1, v0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: global_load_u16 v0, v0, s[0:1]
+; GFX12-NEXT: s_load_b96 s[0:2], s[4:5], 0x24
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: s_cmp_lg_u32 s2, 0
+; GFX12-NEXT: s_cselect_b64 vcc, -1, 0
+; GFX12-NEXT: s_wait_loadcnt 0x0
+; GFX12-NEXT: v_and_b32_e32 v1, 0x7fff, v0
+; GFX12-NEXT: v_xor_b32_e32 v0, 0x8000, v0
+; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX12-NEXT: global_store_b16 v2, v0, s[0:1]
+; GFX12-NEXT: s_endpgm
%idx = call i32 @llvm.amdgcn.workitem.id.x() #1
%f.gep = getelementptr half, ptr addrspace(1) %fptr, i32 %idx
%f = load half, ptr addrspace(1) %f.gep
@@ -1981,6 +2362,24 @@ define amdgpu_kernel void @v_cndmask_abs_neg_f32(ptr addrspace(1) %out, i32 %c,
; GFX11-NEXT: v_cndmask_b32_e64 v0, -v0, |v0|, s[2:3]
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX11-NEXT: s_endpgm
+;
+; GFX12-LABEL: v_cndmask_abs_neg_f32:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_load_b64 s[0:1], s[4:5], 0x34
+; GFX12-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX12-NEXT: v_mov_b32_e32 v1, 0
+; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX12-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: global_load_b32 v0, v0, s[0:1]
+; GFX12-NEXT: s_load_b96 s[0:2], s[4:5], 0x24
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: s_cmp_lg_u32 s2, 0
+; GFX12-NEXT: s_cselect_b64 s[2:3], -1, 0
+; GFX12-NEXT: s_wait_loadcnt 0x0
+; GFX12-NEXT: v_cndmask_b32_e64 v0, -v0, |v0|, s[2:3]
+; GFX12-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX12-NEXT: s_endpgm
%idx = call i32 @llvm.amdgcn.workitem.id.x() #1
%f.gep = getelementptr float, ptr addrspace(1) %fptr, i32 %idx
%f = load float, ptr addrspace(1) %f.gep
@@ -2086,6 +2485,28 @@ define amdgpu_kernel void @v_cndmask_abs_neg_f64(ptr addrspace(1) %out, i32 %c,
; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
; GFX11-NEXT: global_store_b64 v3, v[0:1], s[0:1]
; GFX11-NEXT: s_endpgm
+;
+; GFX12-LABEL: v_cndmask_abs_neg_f64:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_load_b64 s[0:1], s[4:5], 0x34
+; GFX12-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX12-NEXT: v_mov_b32_e32 v3, 0
+; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX12-NEXT: v_lshlrev_b32_e32 v0, 3, v0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: global_load_b64 v[0:1], v0, s[0:1]
+; GFX12-NEXT: s_load_b96 s[0:2], s[4:5], 0x24
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: s_cmp_lg_u32 s2, 0
+; GFX12-NEXT: s_cselect_b64 vcc, -1, 0
+; GFX12-NEXT: s_wait_loadcnt 0x0
+; GFX12-NEXT: v_and_b32_e32 v2, 0x7fffffff, v1
+; GFX12-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
+; GFX12-NEXT: v_cndmask_b32_e32 v0, v0, v0, vcc
+; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX12-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
+; GFX12-NEXT: global_store_b64 v3, v[0:1], s[0:1]
+; GFX12-NEXT: s_endpgm
%idx = call i32 @llvm.amdgcn.workitem.id.x() #1
%f.gep = getelementptr double, ptr addrspace(1) %fptr, i32 %idx
%f = load double, ptr addrspace(1) %f.gep
diff --git a/llvm/test/CodeGen/AMDGPU/valu-mask-write-hazard.mir b/llvm/test/CodeGen/AMDGPU/valu-mask-write-hazard.mir
index c936c13ac6c66f..d91ee542159245 100644
--- a/llvm/test/CodeGen/AMDGPU/valu-mask-write-hazard.mir
+++ b/llvm/test/CodeGen/AMDGPU/valu-mask-write-hazard.mir
@@ -159,16 +159,16 @@ name: mask_hazard_cndmask_dpp3
body: |
bb.0:
; GFX11-LABEL: name: mask_hazard_cndmask_dpp3
- ; GFX11: $vgpr0 = V_CNDMASK_B16_e64_dpp $vgpr0, 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, 1, 15, 15, 1, implicit $exec
+ ; GFX11: $vgpr0 = V_CNDMASK_B16_fake16_e64_dpp $vgpr0, 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, 1, 15, 15, 1, implicit $exec
; GFX11-NEXT: $sgpr2_sgpr3 = S_CSELECT_B64 -1, 0, implicit $scc
; GFX11-NEXT: S_WAITCNT_DEPCTR 65534
; GFX11-NEXT: S_ENDPGM 0
;
; GFX12-LABEL: name: mask_hazard_cndmask_dpp3
- ; GFX12: $vgpr0 = V_CNDMASK_B16_e64_dpp $vgpr0, 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, 1, 15, 15, 1, implicit $exec
+ ; GFX12: $vgpr0 = V_CNDMASK_B16_fake16_e64_dpp $vgpr0, 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, 1, 15, 15, 1, implicit $exec
; GFX12-NEXT: $sgpr2_sgpr3 = S_CSELECT_B64 -1, 0, implicit $scc
; GFX12-NEXT: S_ENDPGM 0
- $vgpr0 = V_CNDMASK_B16_e64_dpp $vgpr0, 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, 1, 15, 15, 1, implicit $exec
+ $vgpr0 = V_CNDMASK_B16_fake16_e64_dpp $vgpr0, 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, 1, 15, 15, 1, implicit $exec
$sgpr2_sgpr3 = S_CSELECT_B64 -1, 0, implicit $scc
S_ENDPGM 0
...
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s
index 6bc92bc29ea8a6..40e3fbda47787a 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s
@@ -899,104 +899,131 @@ v_bfm_b32 v5, src_scc, vcc_lo
v_bfm_b32 v255, 0xaf123456, vcc_hi
// GFX11: v_bfm_b32 v255, 0xaf123456, vcc_hi ; encoding: [0xff,0x00,0x1d,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf]
-v_cndmask_b16 v5, v1, src_scc, s3
-// W32: v_cndmask_b16 v5, v1, src_scc, s3 ; encoding: [0x05,0x00,0x5d,0xd6,0x01,0xfb,0x0d,0x00]
-// W64-ERR: :[[@LINE-2]]:32: error: invalid operand for instruction
+v_cndmask_b16 v5.l, v1.l, src_scc, s3
+// W32: v_cndmask_b16 v5.l, v1.l, src_scc, s3 ; encoding: [0x05,0x00,0x5d,0xd6,0x01,0xfb,0x0d,0x00]
+// W64-ERR: :[[@LINE-2]]:36: error: invalid operand for instruction
-v_cndmask_b16 v5, v255, 0.5, s3
-// W32: v_cndmask_b16 v5, v255, 0.5, s3 ; encoding: [0x05,0x00,0x5d,0xd6,0xff,0xe1,0x0d,0x00]
-// W64-ERR: :[[@LINE-2]]:30: error: invalid operand for instruction
+v_cndmask_b16 v5.l, v255.l, 0.5, s3
+// W32: v_cndmask_b16 v5.l, v255.l, 0.5, s3 ; encoding: [0x05,0x00,0x5d,0xd6,0xff,0xe1,0x0d,0x00]
+// W64-ERR: :[[@LINE-2]]:34: error: invalid operand for instruction
-v_cndmask_b16 v5, s105, s105, s3
-// W32: v_cndmask_b16 v5, s105, s105, s3 ; encoding: [0x05,0x00,0x5d,0xd6,0x69,0xd2,0x0c,0x00]
-// W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
-
-v_cndmask_b16 v5, vcc_hi, v2, s3
-// W32: v_cndmask_b16 v5, vcc_hi, v2, s3 ; encoding: [0x05,0x00,0x5d,0xd6,0x6b,0x04,0x0e,0x00]
-// W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
+v_cndmask_b16 v5.l, s105, s105, s3
+// W32: v_cndmask_b16 v5.l, s105, s105, s3 ; encoding: [0x05,0x00,0x5d,0xd6,0x69,0xd2,0x0c,0x00]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
-v_cndmask_b16 v5, ttmp15, ttmp15, s3
-// W32: v_cndmask_b16 v5, ttmp15, ttmp15, s3 ; encoding: [0x05,0x00,0x5d,0xd6,0x7b,0xf6,0x0c,0x00]
-// W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
+v_cndmask_b16 v5.l, vcc_hi, v2.l, s3
+// W32: v_cndmask_b16 v5.l, vcc_hi, v2.l, s3 ; encoding: [0x05,0x00,0x5d,0xd6,0x6b,0x04,0x0e,0x00]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
-v_cndmask_b16 v5, m0, v255, s3
-// W32: v_cndmask_b16 v5, m0, v255, s3 ; encoding: [0x05,0x00,0x5d,0xd6,0x7d,0xfe,0x0f,0x00]
-// W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
+v_cndmask_b16 v5.l, ttmp15, ttmp15, s3
+// W32: v_cndmask_b16 v5.l, ttmp15, ttmp15, s3 ; encoding: [0x05,0x00,0x5d,0xd6,0x7b,0xf6,0x0c,0x00]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
-v_cndmask_b16 v5, exec_lo, exec_lo, s3
-// W32: v_cndmask_b16 v5, exec_lo, exec_lo, s3 ; encoding: [0x05,0x00,0x5d,0xd6,0x7e,0xfc,0x0c,0x00]
-// W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
+v_cndmask_b16 v5.l, m0, v255.l, s3
+// W32: v_cndmask_b16 v5.l, m0, v255.l, s3 ; encoding: [0x05,0x00,0x5d,0xd6,0x7d,0xfe,0x0f,0x00]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
-v_cndmask_b16 v5, exec_hi, exec_hi, s3
-// W32: v_cndmask_b16 v5, exec_hi, exec_hi, s3 ; encoding: [0x05,0x00,0x5d,0xd6,0x7f,0xfe,0x0c,0x00]
-// W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
+v_cndmask_b16 v5.l, exec_lo, exec_lo, s3
+// W32: v_cndmask_b16 v5.l, exec_lo, exec_lo, s3 ; encoding: [0x05,0x00,0x5d,0xd6,0x7e,0xfc,0x0c,0x00]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
-v_cndmask_b16 v5, null, m0, s105
-// W32: v_cndmask_b16 v5, null, m0, s105 ; encoding: [0x05,0x00,0x5d,0xd6,0x7c,0xfa,0xa4,0x01]
-// W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
+v_cndmask_b16 v5.l, exec_hi, exec_hi, s3
+// W32: v_cndmask_b16 v5.l, exec_hi, exec_hi, s3 ; encoding: [0x05,0x00,0x5d,0xd6,0x7f,0xfe,0x0c,0x00]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
-v_cndmask_b16 v5, -1, -|vcc_lo|, vcc_lo
-// W32: v_cndmask_b16 v5, -1, -|vcc_lo|, vcc_lo ; encoding: [0x05,0x02,0x5d,0xd6,0xc1,0xd4,0xa8,0x41]
-// W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
+v_cndmask_b16 v5.l, null, m0, s105
+// W32: v_cndmask_b16 v5.l, null, m0, s105 ; encoding: [0x05,0x00,0x5d,0xd6,0x7c,0xfa,0xa4,0x01]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
-v_cndmask_b16 v5, 0.5, -1, vcc_hi
-// W32: v_cndmask_b16 v5, 0.5, -1, vcc_hi ; encoding: [0x05,0x00,0x5d,0xd6,0xf0,0x82,0xad,0x01]
-// W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
+v_cndmask_b16 v5.l, -1, -|vcc_lo|, vcc_lo
+// W32: v_cndmask_b16 v5.l, -1, -|vcc_lo|, vcc_lo ; encoding: [0x05,0x02,0x5d,0xd6,0xc1,0xd4,0xa8,0x41]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
-v_cndmask_b16 v5, -|src_scc|, null, ttmp15
-// W32: v_cndmask_b16 v5, -|src_scc|, null, ttmp15 ; encoding: [0x05,0x01,0x5d,0xd6,0xfd,0xf8,0xec,0x21]
+v_cndmask_b16 v5.l, 0.5, -1, vcc_hi
+// W32: v_cndmask_b16 v5.l, 0.5, -1, vcc_hi ; encoding: [0x05,0x00,0x5d,0xd6,0xf0,0x82,0xad,0x01]
// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
-v_cndmask_b16 v5, v1, src_scc, s[6:7]
-// W64: v_cndmask_b16 v5, v1, src_scc, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x01,0xfb,0x19,0x00]
-// W32-ERR: :[[@LINE-2]]:32: error: invalid operand for instruction
+v_cndmask_b16 v5.l, -|src_scc|, null, ttmp15
+// W32: v_cndmask_b16 v5.l, -|src_scc|, null, ttmp15 ; encoding: [0x05,0x01,0x5d,0xd6,0xfd,0xf8,0xec,0x21]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
-v_cndmask_b16 v5, v255, 0.5, s[6:7]
-// W64: v_cndmask_b16 v5, v255, 0.5, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0xff,0xe1,0x19,0x00]
-// W32-ERR: :[[@LINE-2]]:30: error: invalid operand for instruction
+v_cndmask_b16 v5.l, v1.l, src_scc, s[6:7]
+// W64: v_cndmask_b16 v5.l, v1.l, src_scc, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x01,0xfb,0x19,0x00]
+// W32-ERR: :[[@LINE-2]]:36: error: invalid operand for instruction
-v_cndmask_b16 v5, s105, s105, s[6:7]
-// W64: v_cndmask_b16 v5, s105, s105, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x69,0xd2,0x18,0x00]
-// W32-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
+v_cndmask_b16 v5.l, v255.l, 0.5, s[6:7]
+// W64: v_cndmask_b16 v5.l, v255.l, 0.5, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0xff,0xe1,0x19,0x00]
+// W32-ERR: :[[@LINE-2]]:34: error: invalid operand for instruction
-v_cndmask_b16 v5, vcc_hi, v2, s[6:7]
-// W64: v_cndmask_b16 v5, vcc_hi, v2, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x6b,0x04,0x1a,0x00]
-// W32-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
+v_cndmask_b16 v5.l, s105, s105, s[6:7]
+// W64: v_cndmask_b16 v5.l, s105, s105, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x69,0xd2,0x18,0x00]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
-v_cndmask_b16 v5, ttmp15, ttmp15, s[6:7]
-// W64: v_cndmask_b16 v5, ttmp15, ttmp15, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x7b,0xf6,0x18,0x00]
-// W32-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
+v_cndmask_b16 v5.l, vcc_hi, v2.l, s[6:7]
+// W64: v_cndmask_b16 v5.l, vcc_hi, v2.l, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x6b,0x04,0x1a,0x00]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
-v_cndmask_b16 v5, m0, v255, s[6:7]
-// W64: v_cndmask_b16 v5, m0, v255, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x7d,0xfe,0x1b,0x00]
-// W32-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
+v_cndmask_b16 v5.l, ttmp15, ttmp15, s[6:7]
+// W64: v_cndmask_b16 v5.l, ttmp15, ttmp15, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x7b,0xf6,0x18,0x00]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
-v_cndmask_b16 v5, exec_lo, exec_lo, s[6:7]
-// W64: v_cndmask_b16 v5, exec_lo, exec_lo, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x7e,0xfc,0x18,0x00]
-// W32-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
+v_cndmask_b16 v5.l, m0, v255.l, s[6:7]
+// W64: v_cndmask_b16 v5.l, m0, v255.l, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x7d,0xfe,0x1b,0x00]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
-v_cndmask_b16 v5, exec_hi, exec_hi, s[6:7]
-// W64: v_cndmask_b16 v5, exec_hi, exec_hi, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x7f,0xfe,0x18,0x00]
-// W32-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
+v_cndmask_b16 v5.l, exec_lo, exec_lo, s[6:7]
+// W64: v_cndmask_b16 v5.l, exec_lo, exec_lo, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x7e,0xfc,0x18,0x00]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
-v_cndmask_b16 v5, null, m0, s[6:7]
-// W64: v_cndmask_b16 v5, null, m0, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x7c,0xfa,0x18,0x00]
-// W32-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
+v_cndmask_b16 v5.l, exec_hi, exec_hi, s[6:7]
+// W64: v_cndmask_b16 v5.l, exec_hi, exec_hi, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x7f,0xfe,0x18,0x00]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
-v_cndmask_b16 v5, -1, -|vcc_lo|, s[104:105]
-// W64: v_cndmask_b16 v5, -1, -|vcc_lo|, s[104:105] ; encoding: [0x05,0x02,0x5d,0xd6,0xc1,0xd4,0xa0,0x41]
-// W32-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
+v_cndmask_b16 v5.l, null, m0, s[6:7]
+// W64: v_cndmask_b16 v5.l, null, m0, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x7c,0xfa,0x18,0x00]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cndmask_b16 v5.l, -1, -|vcc_lo|, s[104:105]
+// W64: v_cndmask_b16 v5.l, -1, -|vcc_lo|, s[104:105] ; encoding: [0x05,0x02,0x5d,0xd6,0xc1,0xd4,0xa0,0x41]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cndmask_b16 v5.l, 0.5, -1, vcc
+// W64: v_cndmask_b16 v5.l, 0.5, -1, vcc ; encoding: [0x05,0x00,0x5d,0xd6,0xf0,0x82,0xa9,0x01]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cndmask_b16 v5.l, -|src_scc|, null, ttmp[14:15]
+// W64: v_cndmask_b16 v5.l, -|src_scc|, null, ttmp[14:15] ; encoding: [0x05,0x01,0x5d,0xd6,0xfd,0xf8,0xe8,0x21]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cndmask_b16 v255.l, -|0xfe0b|, -|vcc_hi|, null
+// GFX11: v_cndmask_b16 v255.l, -|0xfe0b|, -|vcc_hi|, null ; encoding: [0xff,0x03,0x5d,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00]
+
+v_cndmask_b16 v5.l, 0x3800, -1, vcc_lo
+// W32: v_cndmask_b16 v5.l, 0x3800, -1, vcc_lo ; encoding: [0x05,0x00,0x5d,0xd6,0xff,0x82,0xa9,0x01,0x00,0x38,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cndmask_b16 v5.l, 0x3800, -1, vcc
+// W64: v_cndmask_b16 v5.l, 0x3800, -1, vcc ; encoding: [0x05,0x00,0x5d,0xd6,0xff,0x82,0xa9,0x01,0x00,0x38,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cndmask_b16 v5.l, v255.h, 0.5, s3
+// W32: v_cndmask_b16 v5.l, v255.h, 0.5, s3 ; encoding: [0x05,0x08,0x5d,0xd6,0xff,0xe1,0x0d,0x00]
+// W64-ERR: :[[@LINE-2]]:34: error: invalid operand for instruction
+
+v_cndmask_b16 v5.l, m0, v255.h, s3
+// W32: v_cndmask_b16 v5.l, m0, v255.h, s3 ; encoding: [0x05,0x10,0x5d,0xd6,0x7d,0xfe,0x0f,0x00]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
-v_cndmask_b16 v5, 0.5, -1, vcc
-// W64: v_cndmask_b16 v5, 0.5, -1, vcc ; encoding: [0x05,0x00,0x5d,0xd6,0xf0,0x82,0xa9,0x01]
-// W32-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
+v_cndmask_b16 v5.l, v255.h, 0.5, s[6:7]
+// W64: v_cndmask_b16 v5.l, v255.h, 0.5, s[6:7] ; encoding: [0x05,0x08,0x5d,0xd6,0xff,0xe1,0x19,0x00]
+// W32-ERR: :[[@LINE-2]]:34: error: invalid operand for instruction
-v_cndmask_b16 v5, -|src_scc|, null, ttmp[14:15]
-// W64: v_cndmask_b16 v5, -|src_scc|, null, ttmp[14:15] ; encoding: [0x05,0x01,0x5d,0xd6,0xfd,0xf8,0xe8,0x21]
+v_cndmask_b16 v5.l, m0, v255.h, s[6:7]
+// W64: v_cndmask_b16 v5.l, m0, v255.h, s[6:7] ; encoding: [0x05,0x10,0x5d,0xd6,0x7d,0xfe,0x1b,0x00]
// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
-v_cndmask_b16 v255, -|0xfe0b|, -|vcc_hi|, null
-// GFX11: v_cndmask_b16 v255, -|0xfe0b|, -|vcc_hi|, null ; encoding: [0xff,0x03,0x5d,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00]
+v_cndmask_b16 v255.h, -|0xfe0b|, -|vcc_hi|, null
+// GFX11: v_cndmask_b16 v255.h, -|0xfe0b|, -|vcc_hi|, null ; encoding: [0xff,0x43,0x5d,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00]
v_cubeid_f32 v5, v1, v2, s3
// GFX11: v_cubeid_f32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x0c,0xd6,0x01,0x05,0x0e,0x00]
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s
index 5fa1334aa6e956..2bff644605ff60 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s
@@ -765,112 +765,139 @@ v_bfm_b32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
v_bfm_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
// GFX11: v_bfm_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x1d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
-v_cndmask_b16_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0]
-// W32: v_cndmask_b16_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff]
-// W64-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction
+v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0]
+// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction
-v_cndmask_b16_e64_dpp v5, v1, v2, s3 quad_perm:[0,1,2,3]
-// W32: v_cndmask_b16_e64_dpp v5, v1, v2, s3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0xe4,0x00,0xff]
-// W64-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction
+v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[0,1,2,3]
+// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction
-v_cndmask_b16_e64_dpp v5, v1, v2, s3 row_mirror
-// W32: v_cndmask_b16_e64_dpp v5, v1, v2, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff]
-// W64-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction
+v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_mirror
+// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction
-v_cndmask_b16_e64_dpp v5, v1, v2, s3 row_half_mirror
-// W32: v_cndmask_b16_e64_dpp v5, v1, v2, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff]
-// W64-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction
+v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_half_mirror
+// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction
-v_cndmask_b16_e64_dpp v5, v1, v2, s3 row_shl:1
-// W32: v_cndmask_b16_e64_dpp v5, v1, v2, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x01,0x01,0xff]
-// W64-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction
+v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_shl:1
+// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction
-v_cndmask_b16_e64_dpp v5, v1, v2, s3 row_shl:15
-// W32: v_cndmask_b16_e64_dpp v5, v1, v2, s3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x0f,0x01,0xff]
-// W64-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction
+v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_shl:15
+// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction
-v_cndmask_b16_e64_dpp v5, v1, v2, s3 row_shr:1
-// W32: v_cndmask_b16_e64_dpp v5, v1, v2, s3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x11,0x01,0xff]
-// W64-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction
+v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_shr:1
+// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction
-v_cndmask_b16_e64_dpp v5, v1, v2, s3 row_shr:15
-// W32: v_cndmask_b16_e64_dpp v5, v1, v2, s3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1f,0x01,0xff]
-// W64-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction
+v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_shr:15
+// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction
-v_cndmask_b16_e64_dpp v5, v1, v2, s3 row_ror:1
-// W32: v_cndmask_b16_e64_dpp v5, v1, v2, s3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x21,0x01,0xff]
-// W64-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction
+v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_ror:1
+// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction
-v_cndmask_b16_e64_dpp v5, v1, v2, s105 row_ror:15
-// W32: v_cndmask_b16_e64_dpp v5, v1, v2, s105 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x2f,0x01,0xff]
-// W64-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction
+v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s105 row_ror:15
+// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s105 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction
-v_cndmask_b16_e64_dpp v5, v1, v2, vcc_hi row_share:0 row_mask:0xf bank_mask:0xf
-// W32: v_cndmask_b16_e64_dpp v5, v1, v2, vcc_hi row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xae,0x01,0x01,0x50,0x01,0xff]
-// W64-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction
+v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xae,0x01,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction
-v_cndmask_b16_e64_dpp v5, |v1|, -v2, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1
-// W32: v_cndmask_b16_e64_dpp v5, |v1|, -v2, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x5d,0xd6,0xfa,0x04,0xaa,0x41,0x01,0x5f,0x01,0x01]
-// W64-ERR: :[[@LINE-2]]:38: error: invalid operand for instruction
+v_cndmask_b16_e64_dpp v5.l, |v1.l|, -v2.l, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cndmask_b16_e64_dpp v5.l, |v1.l|, -v2.l, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x5d,0xd6,0xfa,0x04,0xaa,0x41,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:44: error: invalid operand for instruction
-v_cndmask_b16_e64_dpp v5, -v1, |v2|, ttmp15 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W32: v_cndmask_b16_e64_dpp v5, -v1, |v2|, ttmp15 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x02,0x5d,0xd6,0xfa,0x04,0xee,0x21,0x01,0x60,0x09,0x13]
-// W64-ERR: :[[@LINE-2]]:38: error: invalid operand for instruction
+v_cndmask_b16_e64_dpp v5.l, -v1.l, |v2.l|, ttmp15 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cndmask_b16_e64_dpp v5.l, -v1.l, |v2.l|, ttmp15 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x02,0x5d,0xd6,0xfa,0x04,0xee,0x21,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:44: error: invalid operand for instruction
-v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] quad_perm:[3,2,1,0]
-// W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff]
-// W32-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction
+v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] quad_perm:[3,2,1,0]
+// W64: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction
-v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] quad_perm:[0,1,2,3]
-// W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff]
-// W32-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction
+v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] quad_perm:[0,1,2,3]
+// W64: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction
-v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_mirror
-// W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff]
-// W32-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction
+v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_mirror
+// W64: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction
-v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_half_mirror
-// W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff]
-// W32-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction
+v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_half_mirror
+// W64: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction
-v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_shl:1
-// W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff]
-// W32-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction
+v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_shl:1
+// W64: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction
-v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_shl:15
-// W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff]
-// W32-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction
+v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_shl:15
+// W64: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction
-v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_shr:1
-// W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff]
-// W32-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction
+v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_shr:1
+// W64: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction
-v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_shr:15
-// W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff]
-// W32-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction
+v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_shr:15
+// W64: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction
-v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_ror:1
-// W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff]
-// W32-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction
+v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_ror:1
+// W64: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction
-v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_ror:15
-// W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff]
-// W32-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction
+v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_ror:15
+// W64: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction
-v_cndmask_b16_e64_dpp v5, v1, v2, s[104:105] row_share:0 row_mask:0xf bank_mask:0xf
-// W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[104:105] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff]
-// W32-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction
+v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[104:105] row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[104:105] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction
-v_cndmask_b16_e64_dpp v5, |v1|, -v2, vcc row_share:15 row_mask:0x0 bank_mask:0x1
-// W64: v_cndmask_b16_e64_dpp v5, |v1|, -v2, vcc row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x5d,0xd6,0xfa,0x04,0xaa,0x41,0x01,0x5f,0x01,0x01]
-// W32-ERR: :[[@LINE-2]]:38: error: invalid operand for instruction
+v_cndmask_b16_e64_dpp v5.l, |v1.l|, -v2.l, vcc row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cndmask_b16_e64_dpp v5.l, |v1.l|, -v2.l, vcc row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x5d,0xd6,0xfa,0x04,0xaa,0x41,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:44: error: invalid operand for instruction
-v_cndmask_b16_e64_dpp v5, -v1, |v2|, ttmp[14:15] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W64: v_cndmask_b16_e64_dpp v5, -v1, |v2|, ttmp[14:15] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x02,0x5d,0xd6,0xfa,0x04,0xea,0x21,0x01,0x60,0x09,0x13]
-// W32-ERR: :[[@LINE-2]]:38: error: invalid operand for instruction
+v_cndmask_b16_e64_dpp v5.l, -v1.l, |v2.l|, ttmp[14:15] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cndmask_b16_e64_dpp v5.l, -v1.l, |v2.l|, ttmp[14:15] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x02,0x5d,0xd6,0xfa,0x04,0xea,0x21,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:44: error: invalid operand for instruction
-v_cndmask_b16_e64_dpp v255, -|v255|, -|v255|, null row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// GFX11: v_cndmask_b16_e64_dpp v255, -|v255|, -|v255|, null row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x03,0x5d,0xd6,0xfa,0xfe,0xf3,0x61,0xff,0x6f,0x05,0x30]
+v_cndmask_b16_e64_dpp v255.l, -|v255.l|, -|v255.l|, null row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: v_cndmask_b16_e64_dpp v255.l, -|v255.l|, -|v255.l|, null row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x03,0x5d,0xd6,0xfa,0xfe,0xf3,0x61,0xff,0x6f,0x05,0x30]
+
+v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction
+
+v_cndmask_b16_e64_dpp v5.l, |v1.l|, -v2.l, vcc_hi row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cndmask_b16_e64_dpp v5.l, |v1.l|, -v2.l, vcc_hi row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x5d,0xd6,0xfa,0x04,0xae,0x41,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:44: error: invalid operand for instruction
+
+v_cndmask_b16_e64_dpp v5.l, |v1.h|, -v2.l, vcc_hi row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cndmask_b16_e64_dpp v5.l, |v1.h|, -v2.l, vcc_hi row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x09,0x5d,0xd6,0xfa,0x04,0xae,0x41,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:44: error: invalid operand for instruction
+
+v_cndmask_b16_e64_dpp v5.l, -v1.l, |v2.h|, ttmp15 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cndmask_b16_e64_dpp v5.l, -v1.l, |v2.h|, ttmp15 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x12,0x5d,0xd6,0xfa,0x04,0xee,0x21,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:44: error: invalid operand for instruction
+
+v_cndmask_b16_e64_dpp v5.l, |v1.h|, -v2.l, vcc row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cndmask_b16_e64_dpp v5.l, |v1.h|, -v2.l, vcc row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x09,0x5d,0xd6,0xfa,0x04,0xaa,0x41,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:44: error: invalid operand for instruction
+
+v_cndmask_b16_e64_dpp v5.l, -v1.l, |v2.h|, ttmp[14:15] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cndmask_b16_e64_dpp v5.l, -v1.l, |v2.h|, ttmp[14:15] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x12,0x5d,0xd6,0xfa,0x04,0xea,0x21,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:44: error: invalid operand for instruction
+
+v_cndmask_b16_e64_dpp v255.h, -|v255.l|, -|v255.l|, null row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: v_cndmask_b16_e64_dpp v255.h, -|v255.l|, -|v255.l|, null row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x43,0x5d,0xd6,0xfa,0xfe,0xf3,0x61,0xff,0x6f,0x05,0x30]
v_cubeid_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0]
// GFX11: v_cubeid_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff]
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s
index 2fc02061c59deb..2f9b5efca9e172 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s
@@ -424,44 +424,71 @@ v_bfm_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
v_bfm_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
// GFX11: v_bfm_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x00,0x1d,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
-v_cndmask_b16_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cndmask_b16_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05]
-// W64-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction
+v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction
-v_cndmask_b16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cndmask_b16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05]
-// W64-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction
+v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction
-v_cndmask_b16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cndmask_b16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05]
-// W64-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction
+v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction
-v_cndmask_b16_e64_dpp v5, |v1|, -v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cndmask_b16_e64_dpp v5, |v1|, -v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x5d,0xd6,0xe9,0x04,0xaa,0x41,0x01,0x77,0x39,0x05]
-// W64-ERR: :[[@LINE-2]]:38: error: invalid operand for instruction
+v_cndmask_b16_e64_dpp v5.l, |v1.l|, -v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cndmask_b16_e64_dpp v5.l, |v1.l|, -v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x5d,0xd6,0xe9,0x04,0xaa,0x41,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:44: error: invalid operand for instruction
-v_cndmask_b16_e64_dpp v5, -v1, |v2|, ttmp15 dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W32: v_cndmask_b16_e64_dpp v5, -v1, |v2|, ttmp15 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x02,0x5d,0xd6,0xea,0x04,0xee,0x21,0x01,0x77,0x39,0x05]
-// W64-ERR: :[[@LINE-2]]:38: error: invalid operand for instruction
+v_cndmask_b16_e64_dpp v5.l, -v1.l, |v2.l|, ttmp15 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cndmask_b16_e64_dpp v5.l, -v1.l, |v2.l|, ttmp15 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x02,0x5d,0xd6,0xea,0x04,0xee,0x21,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:44: error: invalid operand for instruction
-v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05]
-// W32-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction
+v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction
-v_cndmask_b16_e64_dpp v5, v1, v2, s[104:105] dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[104:105] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0xa2,0x01,0x01,0x77,0x39,0x05]
-// W32-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction
+v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[104:105] dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[104:105] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0xa2,0x01,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction
-v_cndmask_b16_e64_dpp v5, |v1|, -v2, vcc dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cndmask_b16_e64_dpp v5, |v1|, -v2, vcc dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x5d,0xd6,0xe9,0x04,0xaa,0x41,0x01,0x77,0x39,0x05]
-// W32-ERR: :[[@LINE-2]]:38: error: invalid operand for instruction
+v_cndmask_b16_e64_dpp v5.l, |v1.l|, -v2.l, vcc dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cndmask_b16_e64_dpp v5.l, |v1.l|, -v2.l, vcc dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x5d,0xd6,0xe9,0x04,0xaa,0x41,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:44: error: invalid operand for instruction
-v_cndmask_b16_e64_dpp v5, -v1, |v2|, ttmp[14:15] dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W64: v_cndmask_b16_e64_dpp v5, -v1, |v2|, ttmp[14:15] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x02,0x5d,0xd6,0xea,0x04,0xea,0x21,0x01,0x77,0x39,0x05]
-// W32-ERR: :[[@LINE-2]]:38: error: invalid operand for instruction
+v_cndmask_b16_e64_dpp v5.l, -v1.l, |v2.l|, ttmp[14:15] dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cndmask_b16_e64_dpp v5.l, -v1.l, |v2.l|, ttmp[14:15] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x02,0x5d,0xd6,0xea,0x04,0xea,0x21,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:44: error: invalid operand for instruction
-v_cndmask_b16_e64_dpp v255, -|v255|, -|v255|, null dpp8:[0,0,0,0,0,0,0,0] fi:0
-// GFX11: v_cndmask_b16_e64_dpp v255, -|v255|, -|v255|, null dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x03,0x5d,0xd6,0xe9,0xfe,0xf3,0x61,0xff,0x00,0x00,0x00]
+v_cndmask_b16_e64_dpp v255.l, -|v255.l|, -|v255.l|, null dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: v_cndmask_b16_e64_dpp v255.l, -|v255.l|, -|v255.l|, null dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x03,0x5d,0xd6,0xe9,0xfe,0xf3,0x61,0xff,0x00,0x00,0x00]
+
+v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction
+
+v_cndmask_b16_e64_dpp v5.l, |v1.l|, -v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cndmask_b16_e64_dpp v5.l, |v1.l|, -v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x5d,0xd6,0xe9,0x04,0xae,0x41,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:44: error: invalid operand for instruction
+
+v_cndmask_b16_e64_dpp v5.l, |v1.h|, -v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cndmask_b16_e64_dpp v5.l, |v1.h|, -v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x09,0x5d,0xd6,0xe9,0x04,0xae,0x41,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:44: error: invalid operand for instruction
+
+v_cndmask_b16_e64_dpp v5.l, -v1.l, |v2.h|, ttmp15 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cndmask_b16_e64_dpp v5.l, -v1.l, |v2.h|, ttmp15 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x12,0x5d,0xd6,0xea,0x04,0xee,0x21,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:44: error: invalid operand for instruction
+
+v_cndmask_b16_e64_dpp v5.l, |v1.h|, -v2.l, vcc dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cndmask_b16_e64_dpp v5.l, |v1.h|, -v2.l, vcc dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x09,0x5d,0xd6,0xe9,0x04,0xaa,0x41,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:44: error: invalid operand for instruction
+
+v_cndmask_b16_e64_dpp v5.l, -v1.l, |v2.h|, ttmp[14:15] dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cndmask_b16_e64_dpp v5.l, -v1.l, |v2.h|, ttmp[14:15] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x12,0x5d,0xd6,0xea,0x04,0xea,0x21,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:44: error: invalid operand for instruction
+
+v_cndmask_b16_e64_dpp v255.h, -|v255.l|, -|v255.l|, null dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: v_cndmask_b16_e64_dpp v255.h, -|v255.l|, -|v255.l|, null dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x43,0x5d,0xd6,0xe9,0xfe,0xf3,0x61,0xff,0x00,0x00,0x00]
v_cubeid_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cubeid_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0c,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05]
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3.s
index 3e7b7d28c2e971..cd4ed2b9458e6c 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3.s
@@ -857,104 +857,131 @@ v_bfm_b32 v5, src_scc, vcc_lo
v_bfm_b32 v255, 0xaf123456, vcc_hi
// GFX12: v_bfm_b32 v255, 0xaf123456, vcc_hi ; encoding: [0xff,0x00,0x1d,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf]
-v_cndmask_b16 v5, v1, src_scc, s3
-// W32: v_cndmask_b16 v5, v1, src_scc, s3 ; encoding: [0x05,0x00,0x5d,0xd6,0x01,0xfb,0x0d,0x00]
-// W64-ERR: :[[@LINE-2]]:32: error: invalid operand for instruction
+v_cndmask_b16 v5.l, v1.l, src_scc, s3
+// W32: v_cndmask_b16 v5.l, v1.l, src_scc, s3 ; encoding: [0x05,0x00,0x5d,0xd6,0x01,0xfb,0x0d,0x00]
+// W64-ERR: :[[@LINE-2]]:36: error: invalid operand for instruction
-v_cndmask_b16 v5, v255, 0.5, s3
-// W32: v_cndmask_b16 v5, v255, 0.5, s3 ; encoding: [0x05,0x00,0x5d,0xd6,0xff,0xe1,0x0d,0x00]
-// W64-ERR: :[[@LINE-2]]:30: error: invalid operand for instruction
+v_cndmask_b16 v5.l, v255.l, 0.5, s3
+// W32: v_cndmask_b16 v5.l, v255.l, 0.5, s3 ; encoding: [0x05,0x00,0x5d,0xd6,0xff,0xe1,0x0d,0x00]
+// W64-ERR: :[[@LINE-2]]:34: error: invalid operand for instruction
-v_cndmask_b16 v5, s105, s105, s3
-// W32: v_cndmask_b16 v5, s105, s105, s3 ; encoding: [0x05,0x00,0x5d,0xd6,0x69,0xd2,0x0c,0x00]
-// W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
-
-v_cndmask_b16 v5, vcc_hi, v2, s3
-// W32: v_cndmask_b16 v5, vcc_hi, v2, s3 ; encoding: [0x05,0x00,0x5d,0xd6,0x6b,0x04,0x0e,0x00]
-// W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
+v_cndmask_b16 v5.l, s105, s105, s3
+// W32: v_cndmask_b16 v5.l, s105, s105, s3 ; encoding: [0x05,0x00,0x5d,0xd6,0x69,0xd2,0x0c,0x00]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
-v_cndmask_b16 v5, ttmp15, ttmp15, s3
-// W32: v_cndmask_b16 v5, ttmp15, ttmp15, s3 ; encoding: [0x05,0x00,0x5d,0xd6,0x7b,0xf6,0x0c,0x00]
-// W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
+v_cndmask_b16 v5.l, vcc_hi, v2.l, s3
+// W32: v_cndmask_b16 v5.l, vcc_hi, v2.l, s3 ; encoding: [0x05,0x00,0x5d,0xd6,0x6b,0x04,0x0e,0x00]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
-v_cndmask_b16 v5, m0, v255, s3
-// W32: v_cndmask_b16 v5, m0, v255, s3 ; encoding: [0x05,0x00,0x5d,0xd6,0x7d,0xfe,0x0f,0x00]
-// W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
+v_cndmask_b16 v5.l, ttmp15, ttmp15, s3
+// W32: v_cndmask_b16 v5.l, ttmp15, ttmp15, s3 ; encoding: [0x05,0x00,0x5d,0xd6,0x7b,0xf6,0x0c,0x00]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
-v_cndmask_b16 v5, exec_lo, exec_lo, s3
-// W32: v_cndmask_b16 v5, exec_lo, exec_lo, s3 ; encoding: [0x05,0x00,0x5d,0xd6,0x7e,0xfc,0x0c,0x00]
-// W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
+v_cndmask_b16 v5.l, m0, v255.l, s3
+// W32: v_cndmask_b16 v5.l, m0, v255.l, s3 ; encoding: [0x05,0x00,0x5d,0xd6,0x7d,0xfe,0x0f,0x00]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
-v_cndmask_b16 v5, exec_hi, exec_hi, s3
-// W32: v_cndmask_b16 v5, exec_hi, exec_hi, s3 ; encoding: [0x05,0x00,0x5d,0xd6,0x7f,0xfe,0x0c,0x00]
-// W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
+v_cndmask_b16 v5.l, exec_lo, exec_lo, s3
+// W32: v_cndmask_b16 v5.l, exec_lo, exec_lo, s3 ; encoding: [0x05,0x00,0x5d,0xd6,0x7e,0xfc,0x0c,0x00]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
-v_cndmask_b16 v5, null, m0, s105
-// W32: v_cndmask_b16 v5, null, m0, s105 ; encoding: [0x05,0x00,0x5d,0xd6,0x7c,0xfa,0xa4,0x01]
-// W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
+v_cndmask_b16 v5.l, exec_hi, exec_hi, s3
+// W32: v_cndmask_b16 v5.l, exec_hi, exec_hi, s3 ; encoding: [0x05,0x00,0x5d,0xd6,0x7f,0xfe,0x0c,0x00]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
-v_cndmask_b16 v5, -1, -|vcc_lo|, vcc_lo
-// W32: v_cndmask_b16 v5, -1, -|vcc_lo|, vcc_lo ; encoding: [0x05,0x02,0x5d,0xd6,0xc1,0xd4,0xa8,0x41]
-// W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
+v_cndmask_b16 v5.l, null, m0, s105
+// W32: v_cndmask_b16 v5.l, null, m0, s105 ; encoding: [0x05,0x00,0x5d,0xd6,0x7c,0xfa,0xa4,0x01]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
-v_cndmask_b16 v5, 0.5, -1, vcc_hi
-// W32: v_cndmask_b16 v5, 0.5, -1, vcc_hi ; encoding: [0x05,0x00,0x5d,0xd6,0xf0,0x82,0xad,0x01]
-// W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
+v_cndmask_b16 v5.l, -1, -|vcc_lo|, vcc_lo
+// W32: v_cndmask_b16 v5.l, -1, -|vcc_lo|, vcc_lo ; encoding: [0x05,0x02,0x5d,0xd6,0xc1,0xd4,0xa8,0x41]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
-v_cndmask_b16 v5, -|src_scc|, null, ttmp15
-// W32: v_cndmask_b16 v5, -|src_scc|, null, ttmp15 ; encoding: [0x05,0x01,0x5d,0xd6,0xfd,0xf8,0xec,0x21]
+v_cndmask_b16 v5.l, 0.5, -1, vcc_hi
+// W32: v_cndmask_b16 v5.l, 0.5, -1, vcc_hi ; encoding: [0x05,0x00,0x5d,0xd6,0xf0,0x82,0xad,0x01]
// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
-v_cndmask_b16 v5, v1, src_scc, s[6:7]
-// W64: v_cndmask_b16 v5, v1, src_scc, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x01,0xfb,0x19,0x00]
-// W32-ERR: :[[@LINE-2]]:32: error: invalid operand for instruction
+v_cndmask_b16 v5.l, -|src_scc|, null, ttmp15
+// W32: v_cndmask_b16 v5.l, -|src_scc|, null, ttmp15 ; encoding: [0x05,0x01,0x5d,0xd6,0xfd,0xf8,0xec,0x21]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
-v_cndmask_b16 v5, v255, 0.5, s[6:7]
-// W64: v_cndmask_b16 v5, v255, 0.5, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0xff,0xe1,0x19,0x00]
-// W32-ERR: :[[@LINE-2]]:30: error: invalid operand for instruction
+v_cndmask_b16 v5.l, v1.l, src_scc, s[6:7]
+// W64: v_cndmask_b16 v5.l, v1.l, src_scc, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x01,0xfb,0x19,0x00]
+// W32-ERR: :[[@LINE-2]]:36: error: invalid operand for instruction
-v_cndmask_b16 v5, s105, s105, s[6:7]
-// W64: v_cndmask_b16 v5, s105, s105, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x69,0xd2,0x18,0x00]
-// W32-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
+v_cndmask_b16 v5.l, v255.l, 0.5, s[6:7]
+// W64: v_cndmask_b16 v5.l, v255.l, 0.5, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0xff,0xe1,0x19,0x00]
+// W32-ERR: :[[@LINE-2]]:34: error: invalid operand for instruction
-v_cndmask_b16 v5, vcc_hi, v2, s[6:7]
-// W64: v_cndmask_b16 v5, vcc_hi, v2, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x6b,0x04,0x1a,0x00]
-// W32-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
+v_cndmask_b16 v5.l, s105, s105, s[6:7]
+// W64: v_cndmask_b16 v5.l, s105, s105, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x69,0xd2,0x18,0x00]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
-v_cndmask_b16 v5, ttmp15, ttmp15, s[6:7]
-// W64: v_cndmask_b16 v5, ttmp15, ttmp15, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x7b,0xf6,0x18,0x00]
-// W32-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
+v_cndmask_b16 v5.l, vcc_hi, v2.l, s[6:7]
+// W64: v_cndmask_b16 v5.l, vcc_hi, v2.l, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x6b,0x04,0x1a,0x00]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
-v_cndmask_b16 v5, m0, v255, s[6:7]
-// W64: v_cndmask_b16 v5, m0, v255, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x7d,0xfe,0x1b,0x00]
-// W32-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
+v_cndmask_b16 v5.l, ttmp15, ttmp15, s[6:7]
+// W64: v_cndmask_b16 v5.l, ttmp15, ttmp15, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x7b,0xf6,0x18,0x00]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
-v_cndmask_b16 v5, exec_lo, exec_lo, s[6:7]
-// W64: v_cndmask_b16 v5, exec_lo, exec_lo, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x7e,0xfc,0x18,0x00]
-// W32-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
+v_cndmask_b16 v5.l, m0, v255.l, s[6:7]
+// W64: v_cndmask_b16 v5.l, m0, v255.l, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x7d,0xfe,0x1b,0x00]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
-v_cndmask_b16 v5, exec_hi, exec_hi, s[6:7]
-// W64: v_cndmask_b16 v5, exec_hi, exec_hi, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x7f,0xfe,0x18,0x00]
-// W32-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
+v_cndmask_b16 v5.l, exec_lo, exec_lo, s[6:7]
+// W64: v_cndmask_b16 v5.l, exec_lo, exec_lo, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x7e,0xfc,0x18,0x00]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
-v_cndmask_b16 v5, null, m0, s[6:7]
-// W64: v_cndmask_b16 v5, null, m0, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x7c,0xfa,0x18,0x00]
-// W32-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
+v_cndmask_b16 v5.l, exec_hi, exec_hi, s[6:7]
+// W64: v_cndmask_b16 v5.l, exec_hi, exec_hi, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x7f,0xfe,0x18,0x00]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
-v_cndmask_b16 v5, -1, -|vcc_lo|, s[104:105]
-// W64: v_cndmask_b16 v5, -1, -|vcc_lo|, s[104:105] ; encoding: [0x05,0x02,0x5d,0xd6,0xc1,0xd4,0xa0,0x41]
-// W32-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
+v_cndmask_b16 v5.l, null, m0, s[6:7]
+// W64: v_cndmask_b16 v5.l, null, m0, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x7c,0xfa,0x18,0x00]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cndmask_b16 v5.l, -1, -|vcc_lo|, s[104:105]
+// W64: v_cndmask_b16 v5.l, -1, -|vcc_lo|, s[104:105] ; encoding: [0x05,0x02,0x5d,0xd6,0xc1,0xd4,0xa0,0x41]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cndmask_b16 v5.l, 0.5, -1, vcc
+// W64: v_cndmask_b16 v5.l, 0.5, -1, vcc ; encoding: [0x05,0x00,0x5d,0xd6,0xf0,0x82,0xa9,0x01]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cndmask_b16 v5.l, -|src_scc|, null, ttmp[14:15]
+// W64: v_cndmask_b16 v5.l, -|src_scc|, null, ttmp[14:15] ; encoding: [0x05,0x01,0x5d,0xd6,0xfd,0xf8,0xe8,0x21]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cndmask_b16 v255.l, -|0xfe0b|, -|vcc_hi|, null
+// GFX12: v_cndmask_b16 v255.l, -|0xfe0b|, -|vcc_hi|, null ; encoding: [0xff,0x03,0x5d,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00]
-v_cndmask_b16 v5, 0.5, -1, vcc
-// W64: v_cndmask_b16 v5, 0.5, -1, vcc ; encoding: [0x05,0x00,0x5d,0xd6,0xf0,0x82,0xa9,0x01]
-// W32-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
+v_cndmask_b16 v5.l, v255.h, 0.5, s3
+// W32: v_cndmask_b16 v5.l, v255.h, 0.5, s3 ; encoding: [0x05,0x08,0x5d,0xd6,0xff,0xe1,0x0d,0x00]
+// W64-ERR: :[[@LINE-2]]:34: error: invalid operand for instruction
+
+v_cndmask_b16 v5.l, m0, v255.h, s3
+// W32: v_cndmask_b16 v5.l, m0, v255.h, s3 ; encoding: [0x05,0x10,0x5d,0xd6,0x7d,0xfe,0x0f,0x00]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cndmask_b16 v5.l, v255.h, 0.5, s[6:7]
+// W64: v_cndmask_b16 v5.l, v255.h, 0.5, s[6:7] ; encoding: [0x05,0x08,0x5d,0xd6,0xff,0xe1,0x19,0x00]
+// W32-ERR: :[[@LINE-2]]:34: error: invalid operand for instruction
+
+v_cndmask_b16 v5.l, m0, v255.h, s[6:7]
+// W64: v_cndmask_b16 v5.l, m0, v255.h, s[6:7] ; encoding: [0x05,0x10,0x5d,0xd6,0x7d,0xfe,0x1b,0x00]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cndmask_b16 v5.l, 0x3800, -1, vcc_lo
+// W32: v_cndmask_b16 v5.l, 0x3800, -1, vcc_lo ; encoding: [0x05,0x00,0x5d,0xd6,0xff,0x82,0xa9,0x01,0x00,0x38,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
-v_cndmask_b16 v5, -|src_scc|, null, ttmp[14:15]
-// W64: v_cndmask_b16 v5, -|src_scc|, null, ttmp[14:15] ; encoding: [0x05,0x01,0x5d,0xd6,0xfd,0xf8,0xe8,0x21]
+v_cndmask_b16 v5.l, 0x3800, -1, vcc
+// W64: v_cndmask_b16 v5.l, 0x3800, -1, vcc ; encoding: [0x05,0x00,0x5d,0xd6,0xff,0x82,0xa9,0x01,0x00,0x38,0x00,0x00]
// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
-v_cndmask_b16 v255, -|0xfe0b|, -|vcc_hi|, null
-// GFX12: v_cndmask_b16 v255, -|0xfe0b|, -|vcc_hi|, null ; encoding: [0xff,0x03,0x5d,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00]
+v_cndmask_b16 v255.h, -|0xfe0b|, -|vcc_hi|, null
+// GFX12: v_cndmask_b16 v255.h, -|0xfe0b|, -|vcc_hi|, null ; encoding: [0xff,0x43,0x5d,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00]
v_cubeid_f32 v5, v1, v2, s3
// GFX12: v_cubeid_f32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x0c,0xd6,0x01,0x05,0x0e,0x00]
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s
index aa804cc302bf03..78ce7451c1ba73 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s
@@ -869,128 +869,147 @@ v_bfm_b32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
v_bfm_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
// GFX12: v_bfm_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x1d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
-v_cndmask_b16_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0]
-// W32: v_cndmask_b16_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff]
-// W64-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction
+v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0]
+// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction
-v_cndmask_b16_e64_dpp v5, v1, v2, s3 quad_perm:[0,1,2,3]
-// W32: v_cndmask_b16_e64_dpp v5, v1, v2, s3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0xe4,0x00,0xff]
-// W64-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction
+v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[0,1,2,3]
+// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction
-v_cndmask_b16_e64_dpp v5, v1, v2, s3 row_mirror
-// W32: v_cndmask_b16_e64_dpp v5, v1, v2, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff]
-// W64-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction
+v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_mirror
+// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction
-v_cndmask_b16_e64_dpp v5, v1, s2, s3 row_mirror
-// W32: v_cndmask_b16_e64_dpp v5, v1, s2, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0c,0x00,0x01,0x40,0x01,0xff]
-// W64-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction
+v_cndmask_b16_e64_dpp v5.l, v1.l, s2, s3 row_mirror
+// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, s2, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0c,0x00,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:39: error: invalid operand for instruction
-v_cndmask_b16_e64_dpp v5, v1, 10, s3 row_mirror
-// W32: v_cndmask_b16_e64_dpp v5, v1, 10, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x14,0x0d,0x00,0x01,0x40,0x01,0xff]
-// W64-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction
+v_cndmask_b16_e64_dpp v5.l, v1.l, 10, s3 row_mirror
+// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, 10, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x14,0x0d,0x00,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:39: error: invalid operand for instruction
-v_cndmask_b16_e64_dpp v5, v1, v2, s3 row_half_mirror
-// W32: v_cndmask_b16_e64_dpp v5, v1, v2, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff]
-// W64-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction
+v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_half_mirror
+// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction
-v_cndmask_b16_e64_dpp v5, v1, v2, s3 row_shl:1
-// W32: v_cndmask_b16_e64_dpp v5, v1, v2, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x01,0x01,0xff]
-// W64-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction
+v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_shl:1
+// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction
-v_cndmask_b16_e64_dpp v5, v1, v2, s3 row_shl:15
-// W32: v_cndmask_b16_e64_dpp v5, v1, v2, s3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x0f,0x01,0xff]
-// W64-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction
+v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_shl:15
+// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction
-v_cndmask_b16_e64_dpp v5, v1, v2, s3 row_shr:1
-// W32: v_cndmask_b16_e64_dpp v5, v1, v2, s3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x11,0x01,0xff]
-// W64-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction
+v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_shr:1
+// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction
-v_cndmask_b16_e64_dpp v5, v1, v2, s3 row_shr:15
-// W32: v_cndmask_b16_e64_dpp v5, v1, v2, s3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1f,0x01,0xff]
-// W64-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction
+v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_shr:15
+// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction
-v_cndmask_b16_e64_dpp v5, v1, v2, s3 row_ror:1
-// W32: v_cndmask_b16_e64_dpp v5, v1, v2, s3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x21,0x01,0xff]
-// W64-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction
+v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_ror:1
+// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction
-v_cndmask_b16_e64_dpp v5, v1, v2, s105 row_ror:15
-// W32: v_cndmask_b16_e64_dpp v5, v1, v2, s105 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x2f,0x01,0xff]
-// W64-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction
+v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s105 row_ror:15
+// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s105 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction
-v_cndmask_b16_e64_dpp v5, v1, v2, vcc_hi row_share:0 row_mask:0xf bank_mask:0xf
-// W32: v_cndmask_b16_e64_dpp v5, v1, v2, vcc_hi row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xae,0x01,0x01,0x50,0x01,0xff]
-// W64-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction
+v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xae,0x01,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction
-v_cndmask_b16_e64_dpp v5, |v1|, -v2, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1
-// W32: v_cndmask_b16_e64_dpp v5, |v1|, -v2, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x5d,0xd6,0xfa,0x04,0xaa,0x41,0x01,0x5f,0x01,0x01]
-// W64-ERR: :[[@LINE-2]]:38: error: invalid operand for instruction
+v_cndmask_b16_e64_dpp v5.l, |v1.l|, -v2.l, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cndmask_b16_e64_dpp v5.l, |v1.l|, -v2.l, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x5d,0xd6,0xfa,0x04,0xaa,0x41,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:44: error: invalid operand for instruction
-v_cndmask_b16_e64_dpp v5, -v1, |v2|, ttmp15 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W32: v_cndmask_b16_e64_dpp v5, -v1, |v2|, ttmp15 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x02,0x5d,0xd6,0xfa,0x04,0xee,0x21,0x01,0x60,0x09,0x13]
-// W64-ERR: :[[@LINE-2]]:38: error: invalid operand for instruction
+v_cndmask_b16_e64_dpp v5.l, -v1.l, |v2.l|, ttmp15 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cndmask_b16_e64_dpp v5.l, -v1.l, |v2.l|, ttmp15 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x02,0x5d,0xd6,0xfa,0x04,0xee,0x21,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:44: error: invalid operand for instruction
-v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] quad_perm:[3,2,1,0]
-// W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff]
-// W32-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction
+v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] quad_perm:[3,2,1,0]
+// W64: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction
-v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] quad_perm:[0,1,2,3]
-// W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff]
-// W32-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction
+v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] quad_perm:[0,1,2,3]
+// W64: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction
-v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_mirror
-// W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff]
-// W32-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction
+v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_mirror
+// W64: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction
-v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_half_mirror
-// W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff]
-// W32-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction
+v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_half_mirror
+// W64: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction
-v_cndmask_b16_e64_dpp v5, v1, s2, s[6:7] row_half_mirror
-// W64: v_cndmask_b16_e64_dpp v5, v1, s2, s[6:7] row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x18,0x00,0x01,0x41,0x01,0xff]
-// W32-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction
+v_cndmask_b16_e64_dpp v5.l, v1.l, s2, s[6:7] row_half_mirror
+// W64: v_cndmask_b16_e64_dpp v5.l, v1.l, s2, s[6:7] row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x18,0x00,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:39: error: invalid operand for instruction
-v_cndmask_b16_e64_dpp v5, v1, 10, s[6:7] row_half_mirror
-// W64: v_cndmask_b16_e64_dpp v5, v1, 10, s[6:7] row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x14,0x19,0x00,0x01,0x41,0x01,0xff]
-// W32-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction
+v_cndmask_b16_e64_dpp v5.l, v1.l, 10, s[6:7] row_half_mirror
+// W64: v_cndmask_b16_e64_dpp v5.l, v1.l, 10, s[6:7] row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x14,0x19,0x00,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:39: error: invalid operand for instruction
-v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_shl:1
-// W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff]
-// W32-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction
+v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_shl:1
+// W64: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction
-v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_shl:15
-// W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff]
-// W32-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction
+v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_shl:15
+// W64: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction
-v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_shr:1
-// W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff]
-// W32-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction
+v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_shr:1
+// W64: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction
-v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_shr:15
-// W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff]
-// W32-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction
+v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_shr:15
+// W64: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction
-v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_ror:1
-// W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff]
-// W32-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction
+v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_ror:1
+// W64: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction
-v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_ror:15
-// W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff]
-// W32-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction
+v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_ror:15
+// W64: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction
-v_cndmask_b16_e64_dpp v5, v1, v2, s[104:105] row_share:0 row_mask:0xf bank_mask:0xf
-// W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[104:105] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff]
-// W32-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction
+v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[104:105] row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[104:105] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction
-v_cndmask_b16_e64_dpp v5, |v1|, -v2, vcc row_share:15 row_mask:0x0 bank_mask:0x1
-// W64: v_cndmask_b16_e64_dpp v5, |v1|, -v2, vcc row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x5d,0xd6,0xfa,0x04,0xaa,0x41,0x01,0x5f,0x01,0x01]
-// W32-ERR: :[[@LINE-2]]:38: error: invalid operand for instruction
+v_cndmask_b16_e64_dpp v5.l, |v1.l|, -v2.l, vcc row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cndmask_b16_e64_dpp v5.l, |v1.l|, -v2.l, vcc row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x5d,0xd6,0xfa,0x04,0xaa,0x41,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:44: error: invalid operand for instruction
-v_cndmask_b16_e64_dpp v5, -v1, |v2|, ttmp[14:15] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// W64: v_cndmask_b16_e64_dpp v5, -v1, |v2|, ttmp[14:15] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x02,0x5d,0xd6,0xfa,0x04,0xea,0x21,0x01,0x60,0x09,0x13]
-// W32-ERR: :[[@LINE-2]]:38: error: invalid operand for instruction
+v_cndmask_b16_e64_dpp v5.l, -v1.l, |v2.l|, ttmp[14:15] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cndmask_b16_e64_dpp v5.l, -v1.l, |v2.l|, ttmp[14:15] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x02,0x5d,0xd6,0xfa,0x04,0xea,0x21,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:44: error: invalid operand for instruction
-v_cndmask_b16_e64_dpp v255, -|v255|, -|v255|, null row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// GFX12: v_cndmask_b16_e64_dpp v255, -|v255|, -|v255|, null row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x03,0x5d,0xd6,0xfa,0xfe,0xf3,0x61,0xff,0x6f,0x05,0x30]
+v_cndmask_b16_e64_dpp v255.l, -|v255.l|, -|v255.l|, null row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cndmask_b16_e64_dpp v255.l, -|v255.l|, -|v255.l|, null row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x03,0x5d,0xd6,0xfa,0xfe,0xf3,0x61,0xff,0x6f,0x05,0x30]
+
+v_cndmask_b16_e64_dpp v5.l, |v1.h|, -v2.l, vcc_hi row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cndmask_b16_e64_dpp v5.l, |v1.h|, -v2.l, vcc_hi row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x09,0x5d,0xd6,0xfa,0x04,0xae,0x41,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:44: error: invalid operand for instruction
+
+v_cndmask_b16_e64_dpp v5.l, -v1.l, |v2.h|, ttmp15 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cndmask_b16_e64_dpp v5.l, -v1.l, |v2.h|, ttmp15 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x12,0x5d,0xd6,0xfa,0x04,0xee,0x21,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:44: error: invalid operand for instruction
+
+v_cndmask_b16_e64_dpp v5.l, |v1.h|, -v2.l, vcc row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cndmask_b16_e64_dpp v5.l, |v1.h|, -v2.l, vcc row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x09,0x5d,0xd6,0xfa,0x04,0xaa,0x41,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:44: error: invalid operand for instruction
+
+v_cndmask_b16_e64_dpp v5.l, -v1.l, |v2.h|, ttmp[14:15] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cndmask_b16_e64_dpp v5.l, -v1.l, |v2.h|, ttmp[14:15] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x12,0x5d,0xd6,0xfa,0x04,0xea,0x21,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:44: error: invalid operand for instruction
+
+v_cndmask_b16_e64_dpp v255.h, -|v255.l|, -|v255.l|, null row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cndmask_b16_e64_dpp v255.h, -|v255.l|, -|v255.l|, null row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x43,0x5d,0xd6,0xfa,0xfe,0xf3,0x61,0xff,0x6f,0x05,0x30]
v_cubeid_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0]
// GFX12: v_cubeid_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff]
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s
index e93a65ec92e73a..b41f92b8893687 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s
@@ -516,56 +516,75 @@ v_bfm_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
v_bfm_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
// GFX12: v_bfm_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x00,0x1d,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
-v_cndmask_b16_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cndmask_b16_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05]
-// W64-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction
+v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction
-v_cndmask_b16_e64_dpp v5, v1, s2, s3 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cndmask_b16_e64_dpp v5, v1, s2, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0x0c,0x00,0x01,0x77,0x39,0x05]
-// W64-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction
+v_cndmask_b16_e64_dpp v5.l, v1.l, s2, s3 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, s2, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0x0c,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:39: error: invalid operand for instruction
-v_cndmask_b16_e64_dpp v5, v1, 10, s3 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cndmask_b16_e64_dpp v5, v1, 10, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x14,0x0d,0x00,0x01,0x77,0x39,0x05]
-// W64-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction
+v_cndmask_b16_e64_dpp v5.l, v1.l, 10, s3 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, 10, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x14,0x0d,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:39: error: invalid operand for instruction
-v_cndmask_b16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cndmask_b16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05]
-// W64-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction
+v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction
-v_cndmask_b16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cndmask_b16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05]
-// W64-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction
+v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction
-v_cndmask_b16_e64_dpp v5, |v1|, -v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cndmask_b16_e64_dpp v5, |v1|, -v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x5d,0xd6,0xe9,0x04,0xaa,0x41,0x01,0x77,0x39,0x05]
-// W64-ERR: :[[@LINE-2]]:38: error: invalid operand for instruction
+v_cndmask_b16_e64_dpp v5.l, |v1.l|, -v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cndmask_b16_e64_dpp v5.l, |v1.l|, -v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x5d,0xd6,0xe9,0x04,0xaa,0x41,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:44: error: invalid operand for instruction
-v_cndmask_b16_e64_dpp v5, -v1, |v2|, ttmp15 dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W32: v_cndmask_b16_e64_dpp v5, -v1, |v2|, ttmp15 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x02,0x5d,0xd6,0xea,0x04,0xee,0x21,0x01,0x77,0x39,0x05]
-// W64-ERR: :[[@LINE-2]]:38: error: invalid operand for instruction
+v_cndmask_b16_e64_dpp v5.l, -v1.l, |v2.l|, ttmp15 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cndmask_b16_e64_dpp v5.l, -v1.l, |v2.l|, ttmp15 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x02,0x5d,0xd6,0xea,0x04,0xee,0x21,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:44: error: invalid operand for instruction
-v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05]
-// W32-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction
+v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction
-v_cndmask_b16_e64_dpp v5, v1, v2, s[104:105] dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[104:105] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0xa2,0x01,0x01,0x77,0x39,0x05]
-// W32-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction
+v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[104:105] dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[104:105] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0xa2,0x01,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction
-v_cndmask_b16_e64_dpp v5, |v1|, -v2, vcc dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cndmask_b16_e64_dpp v5, |v1|, -v2, vcc dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x5d,0xd6,0xe9,0x04,0xaa,0x41,0x01,0x77,0x39,0x05]
-// W32-ERR: :[[@LINE-2]]:38: error: invalid operand for instruction
+v_cndmask_b16_e64_dpp v5.l, |v1.l|, -v2.l, vcc dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cndmask_b16_e64_dpp v5.l, |v1.l|, -v2.l, vcc dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x5d,0xd6,0xe9,0x04,0xaa,0x41,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:44: error: invalid operand for instruction
-v_cndmask_b16_e64_dpp v5, -v1, |v2|, ttmp[14:15] dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W64: v_cndmask_b16_e64_dpp v5, -v1, |v2|, ttmp[14:15] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x02,0x5d,0xd6,0xea,0x04,0xea,0x21,0x01,0x77,0x39,0x05]
-// W32-ERR: :[[@LINE-2]]:38: error: invalid operand for instruction
+v_cndmask_b16_e64_dpp v5.l, -v1.l, |v2.l|, ttmp[14:15] dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cndmask_b16_e64_dpp v5.l, -v1.l, |v2.l|, ttmp[14:15] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x02,0x5d,0xd6,0xea,0x04,0xea,0x21,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:44: error: invalid operand for instruction
-v_cndmask_b16_e64_dpp v5, -v1, |s2|, ttmp[14:15] dpp8:[7,6,5,4,3,2,1,0] fi:1
-// W64: v_cndmask_b16_e64_dpp v5, -v1, |s2|, ttmp[14:15] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x02,0x5d,0xd6,0xea,0x04,0xe8,0x21,0x01,0x77,0x39,0x05]
-// W32-ERR: :[[@LINE-2]]:38: error: invalid operand for instruction
+v_cndmask_b16_e64_dpp v5.l, -v1.l, |s2|, ttmp[14:15] dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cndmask_b16_e64_dpp v5.l, -v1.l, |s2|, ttmp[14:15] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x02,0x5d,0xd6,0xea,0x04,0xe8,0x21,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:42: error: invalid operand for instruction
-v_cndmask_b16_e64_dpp v255, -|v255|, -|v255|, null dpp8:[0,0,0,0,0,0,0,0] fi:0
-// GFX12: v_cndmask_b16_e64_dpp v255, -|v255|, -|v255|, null dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x03,0x5d,0xd6,0xe9,0xfe,0xf3,0x61,0xff,0x00,0x00,0x00]
+v_cndmask_b16_e64_dpp v255.l, -|v255.l|, -|v255.l|, null dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cndmask_b16_e64_dpp v255.l, -|v255.l|, -|v255.l|, null dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x03,0x5d,0xd6,0xe9,0xfe,0xf3,0x61,0xff,0x00,0x00,0x00]
+
+v_cndmask_b16_e64_dpp v5.l, |v1.h|, -v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cndmask_b16_e64_dpp v5.l, |v1.h|, -v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x09,0x5d,0xd6,0xe9,0x04,0xae,0x41,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:44: error: invalid operand for instruction
+
+v_cndmask_b16_e64_dpp v5.l, -v1.l, |v2.h|, ttmp15 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cndmask_b16_e64_dpp v5.l, -v1.l, |v2.h|, ttmp15 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x12,0x5d,0xd6,0xea,0x04,0xee,0x21,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:44: error: invalid operand for instruction
+
+v_cndmask_b16_e64_dpp v5.l, |v1.h|, -v2.l, vcc dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cndmask_b16_e64_dpp v5.l, |v1.h|, -v2.l, vcc dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x09,0x5d,0xd6,0xe9,0x04,0xaa,0x41,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:44: error: invalid operand for instruction
+
+v_cndmask_b16_e64_dpp v5.l, -v1.l, |v2.h|, ttmp[14:15] dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cndmask_b16_e64_dpp v5.l, -v1.l, |v2.h|, ttmp[14:15] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x12,0x5d,0xd6,0xea,0x04,0xea,0x21,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:44: error: invalid operand for instruction
+
+v_cndmask_b16_e64_dpp v255.h, -|v255.l|, -|v255.l|, null dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cndmask_b16_e64_dpp v255.h, -|v255.l|, -|v255.l|, null dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x43,0x5d,0xd6,0xe9,0xfe,0xf3,0x61,0xff,0x00,0x00,0x00]
v_cubeid_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0]
// GFX12: v_cubeid_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0c,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3.txt
index adcca58776100e..05174e31289199 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3.txt
@@ -1054,55 +1054,100 @@
# GFX11: v_bfm_b32 v255, 0xaf123456, vcc_hi ; encoding: [0xff,0x00,0x1d,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf]
0x05,0x00,0x5d,0xd6,0x01,0xfb,0x19,0x00
-# W32: v_cndmask_b16 v5, v1, src_scc, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0x01,0xfb,0x19,0x00]
-# W64: v_cndmask_b16 v5, v1, src_scc, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x01,0xfb,0x19,0x00]
+# W32-REAL16: v_cndmask_b16 v5.l, v1.l, src_scc, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0x01,0xfb,0x19,0x00]
+# W32-FAKE16: v_cndmask_b16 v5, v1, src_scc, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0x01,0xfb,0x19,0x00]
+# W64-REAL16: v_cndmask_b16 v5.l, v1.l, src_scc, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x01,0xfb,0x19,0x00]
+# W64-FAKE16: v_cndmask_b16 v5, v1, src_scc, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x01,0xfb,0x19,0x00]
0x05,0x00,0x5d,0xd6,0xff,0xe1,0x19,0x00
-# W32: v_cndmask_b16 v5, v255, 0x3800, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0xff,0xff,0x19,0x00,0x00,0x38,0x00,0x00]
-# W64: v_cndmask_b16 v5, v255, 0x3800, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0xff,0xff,0x19,0x00,0x00,0x38,0x00,0x00]
+# W32-REAL16: v_cndmask_b16 v5.l, v255.l, 0x3800, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0xff,0xff,0x19,0x00,0x00,0x38,0x00,0x00]
+# W32-FAKE16: v_cndmask_b16 v5, v255, 0x3800, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0xff,0xff,0x19,0x00,0x00,0x38,0x00,0x00]
+# W64-REAL16: v_cndmask_b16 v5.l, v255.l, 0x3800, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0xff,0xff,0x19,0x00,0x00,0x38,0x00,0x00]
+# W64-FAKE16: v_cndmask_b16 v5, v255, 0x3800, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0xff,0xff,0x19,0x00,0x00,0x38,0x00,0x00]
0x05,0x00,0x5d,0xd6,0x69,0xd2,0x18,0x00
-# W32: v_cndmask_b16 v5, s105, s105, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0x69,0xd2,0x18,0x00]
-# W64: v_cndmask_b16 v5, s105, s105, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x69,0xd2,0x18,0x00]
+# W32-REAL16: v_cndmask_b16 v5.l, s105, s105, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0x69,0xd2,0x18,0x00]
+# W32-FAKE16: v_cndmask_b16 v5, s105, s105, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0x69,0xd2,0x18,0x00]
+# W64-REAL16: v_cndmask_b16 v5.l, s105, s105, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x69,0xd2,0x18,0x00]
+# W64-FAKE16: v_cndmask_b16 v5, s105, s105, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x69,0xd2,0x18,0x00]
0x05,0x00,0x5d,0xd6,0x6b,0x04,0x1a,0x00
-# W32: v_cndmask_b16 v5, vcc_hi, v2, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0x6b,0x04,0x1a,0x00]
-# W64: v_cndmask_b16 v5, vcc_hi, v2, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x6b,0x04,0x1a,0x00]
+# W32-REAL16: v_cndmask_b16 v5.l, vcc_hi, v2.l, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0x6b,0x04,0x1a,0x00]
+# W32-FAKE16: v_cndmask_b16 v5, vcc_hi, v2, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0x6b,0x04,0x1a,0x00]
+# W64-REAL16: v_cndmask_b16 v5.l, vcc_hi, v2.l, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x6b,0x04,0x1a,0x00]
+# W64-FAKE16: v_cndmask_b16 v5, vcc_hi, v2, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x6b,0x04,0x1a,0x00]
0x05,0x00,0x5d,0xd6,0x7b,0xf6,0x18,0x00
-# W32: v_cndmask_b16 v5, ttmp15, ttmp15, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0x7b,0xf6,0x18,0x00]
-# W64: v_cndmask_b16 v5, ttmp15, ttmp15, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x7b,0xf6,0x18,0x00]
+# W32-REAL16: v_cndmask_b16 v5.l, ttmp15, ttmp15, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0x7b,0xf6,0x18,0x00]
+# W32-FAKE16: v_cndmask_b16 v5, ttmp15, ttmp15, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0x7b,0xf6,0x18,0x00]
+# W64-REAL16: v_cndmask_b16 v5.l, ttmp15, ttmp15, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x7b,0xf6,0x18,0x00]
+# W64-FAKE16: v_cndmask_b16 v5, ttmp15, ttmp15, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x7b,0xf6,0x18,0x00]
0x05,0x00,0x5d,0xd6,0x7d,0xfe,0x1b,0x00
-# W32: v_cndmask_b16 v5, m0, v255, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0x7d,0xfe,0x1b,0x00]
-# W64: v_cndmask_b16 v5, m0, v255, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x7d,0xfe,0x1b,0x00]
+# W32-REAL16: v_cndmask_b16 v5.l, m0, v255.l, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0x7d,0xfe,0x1b,0x00]
+# W32-FAKE16: v_cndmask_b16 v5, m0, v255, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0x7d,0xfe,0x1b,0x00]
+# W64-REAL16: v_cndmask_b16 v5.l, m0, v255.l, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x7d,0xfe,0x1b,0x00]
+# W64-FAKE16: v_cndmask_b16 v5, m0, v255, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x7d,0xfe,0x1b,0x00]
0x05,0x00,0x5d,0xd6,0x7e,0xfc,0x18,0x00
-# W32: v_cndmask_b16 v5, exec_lo, exec_lo, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0x7e,0xfc,0x18,0x00]
-# W64: v_cndmask_b16 v5, exec_lo, exec_lo, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x7e,0xfc,0x18,0x00]
+# W32-REAL16: v_cndmask_b16 v5.l, exec_lo, exec_lo, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0x7e,0xfc,0x18,0x00]
+# W32-FAKE16: v_cndmask_b16 v5, exec_lo, exec_lo, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0x7e,0xfc,0x18,0x00]
+# W64-REAL16: v_cndmask_b16 v5.l, exec_lo, exec_lo, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x7e,0xfc,0x18,0x00]
+# W64-FAKE16: v_cndmask_b16 v5, exec_lo, exec_lo, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x7e,0xfc,0x18,0x00]
0x05,0x00,0x5d,0xd6,0x7f,0xfe,0x18,0x00
-# W32: v_cndmask_b16 v5, exec_hi, exec_hi, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0x7f,0xfe,0x18,0x00]
-# W64: v_cndmask_b16 v5, exec_hi, exec_hi, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x7f,0xfe,0x18,0x00]
+# W32-REAL16: v_cndmask_b16 v5.l, exec_hi, exec_hi, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0x7f,0xfe,0x18,0x00]
+# W32-FAKE16: v_cndmask_b16 v5, exec_hi, exec_hi, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0x7f,0xfe,0x18,0x00]
+# W64-REAL16: v_cndmask_b16 v5.l, exec_hi, exec_hi, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x7f,0xfe,0x18,0x00]
+# W64-FAKE16: v_cndmask_b16 v5, exec_hi, exec_hi, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x7f,0xfe,0x18,0x00]
0x05,0x00,0x5d,0xd6,0x7c,0xfa,0x18,0x00
-# W32: v_cndmask_b16 v5, null, m0, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0x7c,0xfa,0x18,0x00]
-# W64: v_cndmask_b16 v5, null, m0, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x7c,0xfa,0x18,0x00]
+# W32-REAL16: v_cndmask_b16 v5.l, null, m0, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0x7c,0xfa,0x18,0x00]
+# W32-FAKE16: v_cndmask_b16 v5, null, m0, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0x7c,0xfa,0x18,0x00]
+# W64-REAL16: v_cndmask_b16 v5.l, null, m0, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x7c,0xfa,0x18,0x00]
+# W64-FAKE16: v_cndmask_b16 v5, null, m0, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x7c,0xfa,0x18,0x00]
0x05,0x02,0x5d,0xd6,0xc1,0xd4,0xa0,0x41
-# W32: v_cndmask_b16 v5, -1, -|vcc_lo|, s104 ; encoding: [0x05,0x02,0x5d,0xd6,0xc1,0xd4,0xa0,0x41]
-# W64: v_cndmask_b16 v5, -1, -|vcc_lo|, s[104:105] ; encoding: [0x05,0x02,0x5d,0xd6,0xc1,0xd4,0xa0,0x41]
+# W32-REAL16: v_cndmask_b16 v5.l, -1, -|vcc_lo|, s104 ; encoding: [0x05,0x02,0x5d,0xd6,0xc1,0xd4,0xa0,0x41]
+# W32-FAKE16: v_cndmask_b16 v5, -1, -|vcc_lo|, s104 ; encoding: [0x05,0x02,0x5d,0xd6,0xc1,0xd4,0xa0,0x41]
+# W64-REAL16: v_cndmask_b16 v5.l, -1, -|vcc_lo|, s[104:105] ; encoding: [0x05,0x02,0x5d,0xd6,0xc1,0xd4,0xa0,0x41]
+# W64-FAKE16: v_cndmask_b16 v5, -1, -|vcc_lo|, s[104:105] ; encoding: [0x05,0x02,0x5d,0xd6,0xc1,0xd4,0xa0,0x41]
0x05,0x00,0x5d,0xd6,0xf0,0x82,0xa9,0x01
-# W32: v_cndmask_b16 v5, 0x3800, -1, vcc_lo ; encoding: [0x05,0x00,0x5d,0xd6,0xff,0x82,0xa9,0x01,0x00,0x38,0x00,0x00]
-# W64: v_cndmask_b16 v5, 0x3800, -1, vcc ; encoding: [0x05,0x00,0x5d,0xd6,0xff,0x82,0xa9,0x01,0x00,0x38,0x00,0x00]
+# W32-REAL16: v_cndmask_b16 v5.l, 0x3800, -1, vcc_lo ; encoding: [0x05,0x00,0x5d,0xd6,0xff,0x82,0xa9,0x01,0x00,0x38,0x00,0x00]
+# W32-FAKE16: v_cndmask_b16 v5, 0x3800, -1, vcc_lo ; encoding: [0x05,0x00,0x5d,0xd6,0xff,0x82,0xa9,0x01,0x00,0x38,0x00,0x00]
+# W64-REAL16: v_cndmask_b16 v5.l, 0x3800, -1, vcc ; encoding: [0x05,0x00,0x5d,0xd6,0xff,0x82,0xa9,0x01,0x00,0x38,0x00,0x00]
+# W64-FAKE16: v_cndmask_b16 v5, 0x3800, -1, vcc ; encoding: [0x05,0x00,0x5d,0xd6,0xff,0x82,0xa9,0x01,0x00,0x38,0x00,0x00]
0x05,0x01,0x5d,0xd6,0xfd,0xf8,0xe8,0x21
-# W32: v_cndmask_b16 v5, -|src_scc|, null, ttmp14 ; encoding: [0x05,0x01,0x5d,0xd6,0xfd,0xf8,0xe8,0x21]
-# W64: v_cndmask_b16 v5, -|src_scc|, null, ttmp[14:15] ; encoding: [0x05,0x01,0x5d,0xd6,0xfd,0xf8,0xe8,0x21]
+# W32-REAL16: v_cndmask_b16 v5.l, -|src_scc|, null, ttmp14 ; encoding: [0x05,0x01,0x5d,0xd6,0xfd,0xf8,0xe8,0x21]
+# W32-FAKE16: v_cndmask_b16 v5, -|src_scc|, null, ttmp14 ; encoding: [0x05,0x01,0x5d,0xd6,0xfd,0xf8,0xe8,0x21]
+# W64-REAL16: v_cndmask_b16 v5.l, -|src_scc|, null, ttmp[14:15] ; encoding: [0x05,0x01,0x5d,0xd6,0xfd,0xf8,0xe8,0x21]
+# W64-FAKE16: v_cndmask_b16 v5, -|src_scc|, null, ttmp[14:15] ; encoding: [0x05,0x01,0x5d,0xd6,0xfd,0xf8,0xe8,0x21]
0xff,0x03,0x5d,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00
-# GFX11: v_cndmask_b16 v255, -|0xfe0b|, -|vcc_hi|, null ; encoding: [0xff,0x03,0x5d,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00]
+# W32-REAL16: v_cndmask_b16 v255.l, -|0xfe0b|, -|vcc_hi|, null ; encoding: [0xff,0x03,0x5d,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_cndmask_b16 v255, -|0xfe0b|, -|vcc_hi|, null ; encoding: [0xff,0x03,0x5d,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_cndmask_b16 v255.l, -|0xfe0b|, -|vcc_hi|, null ; encoding: [0xff,0x03,0x5d,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_cndmask_b16 v255, -|0xfe0b|, -|vcc_hi|, null ; encoding: [0xff,0x03,0x5d,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00]
+
+0x05,0x08,0x5d,0xd6,0xff,0xe1,0x19,0x00
+# W32-REAL16: v_cndmask_b16 v5.l, v255.h, 0x3800, s6 ; encoding: [0x05,0x08,0x5d,0xd6,0xff,0xff,0x19,0x00,0x00,0x38,0x00,0x00]
+# W32-FAKE16: v_cndmask_b16 v5, v255, 0x3800, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0xff,0xff,0x19,0x00,0x00,0x38,0x00,0x00]
+# W64-REAL16: v_cndmask_b16 v5.l, v255.h, 0x3800, s[6:7] ; encoding: [0x05,0x08,0x5d,0xd6,0xff,0xff,0x19,0x00,0x00,0x38,0x00,0x00]
+# W64-FAKE16: v_cndmask_b16 v5, v255, 0x3800, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0xff,0xff,0x19,0x00,0x00,0x38,0x00,0x00]
+
+0x05,0x10,0x5d,0xd6,0x7d,0xfe,0x1b,0x00
+# W32-REAL16: v_cndmask_b16 v5.l, m0, v255.h, s6 ; encoding: [0x05,0x10,0x5d,0xd6,0x7d,0xfe,0x1b,0x00]
+# W32-FAKE16: v_cndmask_b16 v5, m0, v255, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0x7d,0xfe,0x1b,0x00]
+# W64-REAL16: v_cndmask_b16 v5.l, m0, v255.h, s[6:7] ; encoding: [0x05,0x10,0x5d,0xd6,0x7d,0xfe,0x1b,0x00]
+# W64-FAKE16: v_cndmask_b16 v5, m0, v255, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x7d,0xfe,0x1b,0x00]
+
+0xff,0x43,0x5d,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00
+# W32-REAL16: v_cndmask_b16 v255.h, -|0xfe0b|, -|vcc_hi|, null ; encoding: [0xff,0x43,0x5d,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_cndmask_b16 v255, -|0xfe0b|, -|vcc_hi|, null ; encoding: [0xff,0x03,0x5d,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_cndmask_b16 v255.h, -|0xfe0b|, -|vcc_hi|, null ; encoding: [0xff,0x43,0x5d,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_cndmask_b16 v255, -|0xfe0b|, -|vcc_hi|, null ; encoding: [0xff,0x03,0x5d,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00]
0x05,0x00,0x0c,0xd6,0x01,0x05,0x0e,0x00
# GFX11: v_cubeid_f32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x0c,0xd6,0x01,0x05,0x0e,0x00]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16.txt
index 2964360a77fd2d..c9ef3c714213d1 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16.txt
@@ -738,65 +738,118 @@
# GFX11: v_bfm_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x1d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff
-# W32: v_cndmask_b16_e64_dpp v5, v1, v2, s6 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff]
-# W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff]
+# W32-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s6 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff]
+# W32-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s6 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff]
+# W64-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff]
+# W64-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff]
0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff
-# W32: v_cndmask_b16_e64_dpp v5, v1, v2, s6 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff]
-# W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff]
+# W32-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s6 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff]
+# W32-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s6 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff]
+# W64-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff]
+# W64-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff]
0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff
-# W32: v_cndmask_b16_e64_dpp v5, v1, v2, s6 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff]
-# W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff]
+# W32-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s6 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff]
+# W32-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s6 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff]
+# W64-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff]
+# W64-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff]
0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff
-# W32: v_cndmask_b16_e64_dpp v5, v1, v2, s6 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff]
-# W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff]
+# W32-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s6 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff]
+# W32-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s6 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff]
+# W64-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff]
+# W64-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff]
0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff
-# W32: v_cndmask_b16_e64_dpp v5, v1, v2, s6 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff]
-# W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff]
+# W32-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s6 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff]
+# W32-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s6 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff]
+# W64-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff]
+# W64-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff]
0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff
-# W32: v_cndmask_b16_e64_dpp v5, v1, v2, s6 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff]
-# W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff]
+# W32-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s6 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff]
+# W32-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s6 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff]
+# W64-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff]
+# W64-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff]
0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff
-# W32: v_cndmask_b16_e64_dpp v5, v1, v2, s6 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff]
-# W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff]
+# W32-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s6 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff]
+# W32-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s6 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff]
+# W64-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff]
+# W64-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff]
0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff
-# W32: v_cndmask_b16_e64_dpp v5, v1, v2, s6 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff]
-# W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff]
+# W32-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s6 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff]
+# W32-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s6 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff]
+# W64-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff]
+# W64-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff]
0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff
-# W32: v_cndmask_b16_e64_dpp v5, v1, v2, s6 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff]
-# W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff]
+# W32-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s6 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff]
+# W32-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s6 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff]
+# W64-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff]
+# W64-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff]
0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff
-# W32: v_cndmask_b16_e64_dpp v5, v1, v2, s6 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff]
-# W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff]
+# W32-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s6 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff]
+# W32-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s6 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff]
+# W64-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff]
+# W64-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff]
0x05,0x00,0x5d,0xd6,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff
-# W32: v_cndmask_b16_e64_dpp v5, v1, v2, s104 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff]
-# W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[104:105] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff]
+# W32-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s104 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff]
+# W32-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s104 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff]
+# W64-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[104:105] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff]
+# W64-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s[104:105] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff]
0x05,0x00,0x5d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x5f,0x01,0x01
-# W32: v_cndmask_b16_e64_dpp v5, v1, v2, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x5f,0x01,0x01]
-# W64: v_cndmask_b16_e64_dpp v5, v1, v2, vcc row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x5f,0x01,0x01]
+# W32-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, vcc row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, vcc row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x5f,0x01,0x01]
0x05,0x00,0x5d,0xd6,0xfa,0x04,0xea,0x01,0x01,0x60,0x01,0x13
-# W32: v_cndmask_b16_e64_dpp v5, v1, v2, ttmp14 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xea,0x01,0x01,0x60,0x01,0x13]
-# W64: v_cndmask_b16_e64_dpp v5, v1, v2, ttmp[14:15] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xea,0x01,0x01,0x60,0x01,0x13]
+# W32-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, ttmp14 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xea,0x01,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, ttmp14 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xea,0x01,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, ttmp[14:15] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xea,0x01,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, ttmp[14:15] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xea,0x01,0x01,0x60,0x01,0x13]
0x05,0x02,0x5d,0xd6,0xfa,0x04,0xf2,0x21,0x01,0x6f,0x09,0x30
-# GFX11: v_cndmask_b16_e64_dpp v5, -v1, |v2|, null row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 ; encoding: [0x05,0x02,0x5d,0xd6,0xfa,0x04,0xf2,0x21,0x01,0x6f,0x09,0x30]
+# W32-REAL16: v_cndmask_b16_e64_dpp v5.l, -v1.l, |v2.l|, null row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 ; encoding: [0x05,0x02,0x5d,0xd6,0xfa,0x04,0xf2,0x21,0x01,0x6f,0x09,0x30]
+# W32-FAKE16: v_cndmask_b16_e64_dpp v5, -v1, |v2|, null row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 ; encoding: [0x05,0x02,0x5d,0xd6,0xfa,0x04,0xf2,0x21,0x01,0x6f,0x09,0x30]
+# W64-REAL16: v_cndmask_b16_e64_dpp v5.l, -v1.l, |v2.l|, null row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 ; encoding: [0x05,0x02,0x5d,0xd6,0xfa,0x04,0xf2,0x21,0x01,0x6f,0x09,0x30]
+# W64-FAKE16: v_cndmask_b16_e64_dpp v5, -v1, |v2|, null row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 ; encoding: [0x05,0x02,0x5d,0xd6,0xfa,0x04,0xf2,0x21,0x01,0x6f,0x09,0x30]
0x05,0x01,0x5d,0xd6,0xfa,0x04,0xf2,0x41,0x01,0x6f,0x09,0x30
-# GFX11: v_cndmask_b16_e64_dpp v5, |v1|, -v2, null row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 ; encoding: [0x05,0x01,0x5d,0xd6,0xfa,0x04,0xf2,0x41,0x01,0x6f,0x09,0x30]
+# W32-REAL16: v_cndmask_b16_e64_dpp v5.l, |v1.l|, -v2.l, null row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 ; encoding: [0x05,0x01,0x5d,0xd6,0xfa,0x04,0xf2,0x41,0x01,0x6f,0x09,0x30]
+# W32-FAKE16: v_cndmask_b16_e64_dpp v5, |v1|, -v2, null row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 ; encoding: [0x05,0x01,0x5d,0xd6,0xfa,0x04,0xf2,0x41,0x01,0x6f,0x09,0x30]
+# W64-REAL16: v_cndmask_b16_e64_dpp v5.l, |v1.l|, -v2.l, null row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 ; encoding: [0x05,0x01,0x5d,0xd6,0xfa,0x04,0xf2,0x41,0x01,0x6f,0x09,0x30]
+# W64-FAKE16: v_cndmask_b16_e64_dpp v5, |v1|, -v2, null row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 ; encoding: [0x05,0x01,0x5d,0xd6,0xfa,0x04,0xf2,0x41,0x01,0x6f,0x09,0x30]
0xff,0x00,0x5d,0xd6,0xfa,0xfe,0xf3,0x01,0xff,0x6f,0x0d,0x30
-# GFX11: v_cndmask_b16_e64_dpp v255, v255, v255, null row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x5d,0xd6,0xfa,0xfe,0xf3,0x01,0xff,0x6f,0x0d,0x30]
+# W32-REAL16: v_cndmask_b16_e64_dpp v255.l, v255.l, v255.l, null row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x5d,0xd6,0xfa,0xfe,0xf3,0x01,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_cndmask_b16_e64_dpp v255, v255, v255, null row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x5d,0xd6,0xfa,0xfe,0xf3,0x01,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_cndmask_b16_e64_dpp v255.l, v255.l, v255.l, null row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x5d,0xd6,0xfa,0xfe,0xf3,0x01,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_cndmask_b16_e64_dpp v255, v255, v255, null row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x5d,0xd6,0xfa,0xfe,0xf3,0x01,0xff,0x6f,0x0d,0x30]
+
+0x05,0x09,0x5d,0xd6,0xfa,0x04,0xaa,0x41,0x01,0x5f,0x01,0x01
+# W32-REAL16: v_cndmask_b16_e64_dpp v5.l, |v1.h|, -v2.l, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x09,0x5d,0xd6,0xfa,0x04,0xaa,0x41,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_cndmask_b16_e64_dpp v5, |v1|, -v2, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x5d,0xd6,0xfa,0x04,0xaa,0x41,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_cndmask_b16_e64_dpp v5.l, |v1.h|, -v2.l, vcc row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x09,0x5d,0xd6,0xfa,0x04,0xaa,0x41,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_cndmask_b16_e64_dpp v5, |v1|, -v2, vcc row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x5d,0xd6,0xfa,0x04,0xaa,0x41,0x01,0x5f,0x01,0x01]
+
+0x05,0x12,0x5d,0xd6,0xfa,0x04,0xea,0x21,0x01,0x60,0x01,0x13
+# W32-REAL16: v_cndmask_b16_e64_dpp v5.l, -v1.l, |v2.h|, ttmp14 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x12,0x5d,0xd6,0xfa,0x04,0xea,0x21,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_cndmask_b16_e64_dpp v5, -v1, |v2|, ttmp14 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x02,0x5d,0xd6,0xfa,0x04,0xea,0x21,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_cndmask_b16_e64_dpp v5.l, -v1.l, |v2.h|, ttmp[14:15] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x12,0x5d,0xd6,0xfa,0x04,0xea,0x21,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_cndmask_b16_e64_dpp v5, -v1, |v2|, ttmp[14:15] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x02,0x5d,0xd6,0xfa,0x04,0xea,0x21,0x01,0x60,0x01,0x13]
+
+0xff,0x43,0x5d,0xd6,0xfa,0xfe,0xf3,0x61,0xff,0x6f,0x0d,0x30
+# W32-REAL16: v_cndmask_b16_e64_dpp v255.h, -|v255.l|, -|v255.l|, null row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x43,0x5d,0xd6,0xfa,0xfe,0xf3,0x61,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_cndmask_b16_e64_dpp v255, -|v255|, -|v255|, null row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x03,0x5d,0xd6,0xfa,0xfe,0xf3,0x61,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_cndmask_b16_e64_dpp v255.h, -|v255.l|, -|v255.l|, null row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x43,0x5d,0xd6,0xfa,0xfe,0xf3,0x61,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_cndmask_b16_e64_dpp v255, -|v255|, -|v255|, null row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x03,0x5d,0xd6,0xfa,0xfe,0xf3,0x61,0xff,0x6f,0x0d,0x30]
0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff
# GFX11: v_cubeid_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8.txt
index 7a81ba23afa35d..1e74b5aec0cf31 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8.txt
@@ -396,29 +396,64 @@
# GFX11: v_bfm_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x1d,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
0x05,0x00,0x5d,0xd6,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05
-# W32: v_cndmask_b16_e64_dpp v5, v1, v2, s6 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05]
-# W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s6 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s6 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05]
0x05,0x00,0x5d,0xd6,0xe9,0x04,0xa2,0x01,0x01,0x77,0x39,0x05
-# W32: v_cndmask_b16_e64_dpp v5, v1, v2, s104 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0xa2,0x01,0x01,0x77,0x39,0x05]
-# W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[104:105] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0xa2,0x01,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s104 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0xa2,0x01,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s104 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0xa2,0x01,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[104:105] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0xa2,0x01,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s[104:105] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0xa2,0x01,0x01,0x77,0x39,0x05]
0x05,0x00,0x5d,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05
-# W32: v_cndmask_b16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05]
-# W64: v_cndmask_b16_e64_dpp v5, v1, v2, vcc dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, vcc dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, vcc dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05]
0x05,0x00,0x5d,0xd6,0xe9,0x04,0xea,0x01,0x01,0x77,0x39,0x05
-# W32: v_cndmask_b16_e64_dpp v5, v1, v2, ttmp14 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0xea,0x01,0x01,0x77,0x39,0x05]
-# W64: v_cndmask_b16_e64_dpp v5, v1, v2, ttmp[14:15] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0xea,0x01,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, ttmp14 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0xea,0x01,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, ttmp14 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0xea,0x01,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, ttmp[14:15] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0xea,0x01,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, ttmp[14:15] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0xea,0x01,0x01,0x77,0x39,0x05]
0x05,0x02,0x5d,0xd6,0xe9,0x04,0xf2,0x21,0x01,0x00,0x00,0x00
-# GFX11: v_cndmask_b16_e64_dpp v5, -v1, |v2|, null dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x05,0x02,0x5d,0xd6,0xe9,0x04,0xf2,0x21,0x01,0x00,0x00,0x00]
+# W32-REAL16: v_cndmask_b16_e64_dpp v5.l, -v1.l, |v2.l|, null dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x05,0x02,0x5d,0xd6,0xe9,0x04,0xf2,0x21,0x01,0x00,0x00,0x00]
+# W32-FAKE16: v_cndmask_b16_e64_dpp v5, -v1, |v2|, null dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x05,0x02,0x5d,0xd6,0xe9,0x04,0xf2,0x21,0x01,0x00,0x00,0x00]
+# W64-REAL16: v_cndmask_b16_e64_dpp v5.l, -v1.l, |v2.l|, null dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x05,0x02,0x5d,0xd6,0xe9,0x04,0xf2,0x21,0x01,0x00,0x00,0x00]
+# W64-FAKE16: v_cndmask_b16_e64_dpp v5, -v1, |v2|, null dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x05,0x02,0x5d,0xd6,0xe9,0x04,0xf2,0x21,0x01,0x00,0x00,0x00]
0x05,0x01,0x5d,0xd6,0xe9,0x04,0xf2,0x41,0x01,0x00,0x00,0x00
-# GFX11: v_cndmask_b16_e64_dpp v5, |v1|, -v2, null dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x05,0x01,0x5d,0xd6,0xe9,0x04,0xf2,0x41,0x01,0x00,0x00,0x00]
+# W32-REAL16: v_cndmask_b16_e64_dpp v5.l, |v1.l|, -v2.l, null dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x05,0x01,0x5d,0xd6,0xe9,0x04,0xf2,0x41,0x01,0x00,0x00,0x00]
+# W32-FAKE16: v_cndmask_b16_e64_dpp v5, |v1|, -v2, null dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x05,0x01,0x5d,0xd6,0xe9,0x04,0xf2,0x41,0x01,0x00,0x00,0x00]
+# W64-REAL16: v_cndmask_b16_e64_dpp v5.l, |v1.l|, -v2.l, null dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x05,0x01,0x5d,0xd6,0xe9,0x04,0xf2,0x41,0x01,0x00,0x00,0x00]
+# W64-FAKE16: v_cndmask_b16_e64_dpp v5, |v1|, -v2, null dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x05,0x01,0x5d,0xd6,0xe9,0x04,0xf2,0x41,0x01,0x00,0x00,0x00]
0xff,0x00,0x5d,0xd6,0xea,0xfe,0xf3,0x01,0xff,0x00,0x00,0x00
-# GFX11: v_cndmask_b16_e64_dpp v255, v255, v255, null dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x5d,0xd6,0xea,0xfe,0xf3,0x01,0xff,0x00,0x00,0x00]
+# W32-REAL16: v_cndmask_b16_e64_dpp v255.l, v255.l, v255.l, null dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x5d,0xd6,0xea,0xfe,0xf3,0x01,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_cndmask_b16_e64_dpp v255, v255, v255, null dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x5d,0xd6,0xea,0xfe,0xf3,0x01,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_cndmask_b16_e64_dpp v255.l, v255.l, v255.l, null dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x5d,0xd6,0xea,0xfe,0xf3,0x01,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_cndmask_b16_e64_dpp v255, v255, v255, null dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x5d,0xd6,0xea,0xfe,0xf3,0x01,0xff,0x00,0x00,0x00]
+
+0x05,0x09,0x5d,0xd6,0xe9,0x04,0xaa,0x41,0x01,0x77,0x39,0x05
+# W32-REAL16: v_cndmask_b16_e64_dpp v5.l, |v1.h|, -v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x09,0x5d,0xd6,0xe9,0x04,0xaa,0x41,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cndmask_b16_e64_dpp v5, |v1|, -v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x5d,0xd6,0xe9,0x04,0xaa,0x41,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cndmask_b16_e64_dpp v5.l, |v1.h|, -v2.l, vcc dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x09,0x5d,0xd6,0xe9,0x04,0xaa,0x41,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cndmask_b16_e64_dpp v5, |v1|, -v2, vcc dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x5d,0xd6,0xe9,0x04,0xaa,0x41,0x01,0x77,0x39,0x05]
+
+0x05,0x12,0x5d,0xd6,0xe9,0x04,0xea,0x21,0x01,0x77,0x39,0x05
+# W32-REAL16: v_cndmask_b16_e64_dpp v5.l, -v1.l, |v2.h|, ttmp14 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x12,0x5d,0xd6,0xe9,0x04,0xea,0x21,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cndmask_b16_e64_dpp v5, -v1, |v2|, ttmp14 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x5d,0xd6,0xe9,0x04,0xea,0x21,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cndmask_b16_e64_dpp v5.l, -v1.l, |v2.h|, ttmp[14:15] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x12,0x5d,0xd6,0xe9,0x04,0xea,0x21,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cndmask_b16_e64_dpp v5, -v1, |v2|, ttmp[14:15] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x5d,0xd6,0xe9,0x04,0xea,0x21,0x01,0x77,0x39,0x05]
+
+0xff,0x43,0x5d,0xd6,0xea,0xfe,0xf3,0x61,0xff,0x00,0x00,0x00
+# W32-REAL16: v_cndmask_b16_e64_dpp v255.h, -|v255.l|, -|v255.l|, null dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x43,0x5d,0xd6,0xea,0xfe,0xf3,0x61,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_cndmask_b16_e64_dpp v255, -|v255|, -|v255|, null dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x03,0x5d,0xd6,0xea,0xfe,0xf3,0x61,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_cndmask_b16_e64_dpp v255.h, -|v255.l|, -|v255.l|, null dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x43,0x5d,0xd6,0xea,0xfe,0xf3,0x61,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_cndmask_b16_e64_dpp v255, -|v255|, -|v255|, null dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x03,0x5d,0xd6,0xea,0xfe,0xf3,0x61,0xff,0x00,0x00,0x00]
0x05,0x00,0x0c,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05
# GFX11: v_cubeid_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0c,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3.txt
index 633d3a48634fa1..4108fd9c8be620 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3.txt
@@ -1018,55 +1018,100 @@
# GFX12: v_bfm_b32 v255, 0xaf123456, vcc_hi ; encoding: [0xff,0x00,0x1d,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf]
0x05,0x00,0x5d,0xd6,0x01,0xfb,0x19,0x00
-# W32: v_cndmask_b16 v5, v1, src_scc, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0x01,0xfb,0x19,0x00]
-# W64: v_cndmask_b16 v5, v1, src_scc, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x01,0xfb,0x19,0x00]
+# W32-REAL16: v_cndmask_b16 v5.l, v1.l, src_scc, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0x01,0xfb,0x19,0x00]
+# W32-FAKE16: v_cndmask_b16 v5, v1, src_scc, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0x01,0xfb,0x19,0x00]
+# W64-REAL16: v_cndmask_b16 v5.l, v1.l, src_scc, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x01,0xfb,0x19,0x00]
+# W64-FAKE16: v_cndmask_b16 v5, v1, src_scc, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x01,0xfb,0x19,0x00]
0x05,0x00,0x5d,0xd6,0xff,0xe1,0x19,0x00
-# W32: v_cndmask_b16 v5, v255, 0x3800, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0xff,0xff,0x19,0x00,0x00,0x38,0x00,0x00]
-# W64: v_cndmask_b16 v5, v255, 0x3800, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0xff,0xff,0x19,0x00,0x00,0x38,0x00,0x00]
+# W32-REAL16: v_cndmask_b16 v5.l, v255.l, 0x3800, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0xff,0xff,0x19,0x00,0x00,0x38,0x00,0x00]
+# W32-FAKE16: v_cndmask_b16 v5, v255, 0x3800, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0xff,0xff,0x19,0x00,0x00,0x38,0x00,0x00]
+# W64-REAL16: v_cndmask_b16 v5.l, v255.l, 0x3800, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0xff,0xff,0x19,0x00,0x00,0x38,0x00,0x00]
+# W64-FAKE16: v_cndmask_b16 v5, v255, 0x3800, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0xff,0xff,0x19,0x00,0x00,0x38,0x00,0x00]
0x05,0x00,0x5d,0xd6,0x69,0xd2,0x18,0x00
-# W32: v_cndmask_b16 v5, s105, s105, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0x69,0xd2,0x18,0x00]
-# W64: v_cndmask_b16 v5, s105, s105, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x69,0xd2,0x18,0x00]
+# W32-REAL16: v_cndmask_b16 v5.l, s105, s105, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0x69,0xd2,0x18,0x00]
+# W32-FAKE16: v_cndmask_b16 v5, s105, s105, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0x69,0xd2,0x18,0x00]
+# W64-REAL16: v_cndmask_b16 v5.l, s105, s105, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x69,0xd2,0x18,0x00]
+# W64-FAKE16: v_cndmask_b16 v5, s105, s105, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x69,0xd2,0x18,0x00]
0x05,0x00,0x5d,0xd6,0x6b,0x04,0x1a,0x00
-# W32: v_cndmask_b16 v5, vcc_hi, v2, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0x6b,0x04,0x1a,0x00]
-# W64: v_cndmask_b16 v5, vcc_hi, v2, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x6b,0x04,0x1a,0x00]
+# W32-REAL16: v_cndmask_b16 v5.l, vcc_hi, v2.l, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0x6b,0x04,0x1a,0x00]
+# W32-FAKE16: v_cndmask_b16 v5, vcc_hi, v2, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0x6b,0x04,0x1a,0x00]
+# W64-REAL16: v_cndmask_b16 v5.l, vcc_hi, v2.l, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x6b,0x04,0x1a,0x00]
+# W64-FAKE16: v_cndmask_b16 v5, vcc_hi, v2, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x6b,0x04,0x1a,0x00]
0x05,0x00,0x5d,0xd6,0x7b,0xf6,0x18,0x00
-# W32: v_cndmask_b16 v5, ttmp15, ttmp15, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0x7b,0xf6,0x18,0x00]
-# W64: v_cndmask_b16 v5, ttmp15, ttmp15, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x7b,0xf6,0x18,0x00]
+# W32-REAL16: v_cndmask_b16 v5.l, ttmp15, ttmp15, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0x7b,0xf6,0x18,0x00]
+# W32-FAKE16: v_cndmask_b16 v5, ttmp15, ttmp15, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0x7b,0xf6,0x18,0x00]
+# W64-REAL16: v_cndmask_b16 v5.l, ttmp15, ttmp15, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x7b,0xf6,0x18,0x00]
+# W64-FAKE16: v_cndmask_b16 v5, ttmp15, ttmp15, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x7b,0xf6,0x18,0x00]
0x05,0x00,0x5d,0xd6,0x7d,0xfe,0x1b,0x00
-# W32: v_cndmask_b16 v5, m0, v255, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0x7d,0xfe,0x1b,0x00]
-# W64: v_cndmask_b16 v5, m0, v255, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x7d,0xfe,0x1b,0x00]
+# W32-REAL16: v_cndmask_b16 v5.l, m0, v255.l, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0x7d,0xfe,0x1b,0x00]
+# W32-FAKE16: v_cndmask_b16 v5, m0, v255, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0x7d,0xfe,0x1b,0x00]
+# W64-REAL16: v_cndmask_b16 v5.l, m0, v255.l, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x7d,0xfe,0x1b,0x00]
+# W64-FAKE16: v_cndmask_b16 v5, m0, v255, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x7d,0xfe,0x1b,0x00]
0x05,0x00,0x5d,0xd6,0x7e,0xfc,0x18,0x00
-# W32: v_cndmask_b16 v5, exec_lo, exec_lo, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0x7e,0xfc,0x18,0x00]
-# W64: v_cndmask_b16 v5, exec_lo, exec_lo, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x7e,0xfc,0x18,0x00]
+# W32-REAL16: v_cndmask_b16 v5.l, exec_lo, exec_lo, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0x7e,0xfc,0x18,0x00]
+# W32-FAKE16: v_cndmask_b16 v5, exec_lo, exec_lo, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0x7e,0xfc,0x18,0x00]
+# W64-REAL16: v_cndmask_b16 v5.l, exec_lo, exec_lo, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x7e,0xfc,0x18,0x00]
+# W64-FAKE16: v_cndmask_b16 v5, exec_lo, exec_lo, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x7e,0xfc,0x18,0x00]
0x05,0x00,0x5d,0xd6,0x7f,0xfe,0x18,0x00
-# W32: v_cndmask_b16 v5, exec_hi, exec_hi, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0x7f,0xfe,0x18,0x00]
-# W64: v_cndmask_b16 v5, exec_hi, exec_hi, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x7f,0xfe,0x18,0x00]
+# W32-REAL16: v_cndmask_b16 v5.l, exec_hi, exec_hi, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0x7f,0xfe,0x18,0x00]
+# W32-FAKE16: v_cndmask_b16 v5, exec_hi, exec_hi, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0x7f,0xfe,0x18,0x00]
+# W64-REAL16: v_cndmask_b16 v5.l, exec_hi, exec_hi, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x7f,0xfe,0x18,0x00]
+# W64-FAKE16: v_cndmask_b16 v5, exec_hi, exec_hi, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x7f,0xfe,0x18,0x00]
0x05,0x00,0x5d,0xd6,0x7c,0xfa,0x18,0x00
-# W32: v_cndmask_b16 v5, null, m0, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0x7c,0xfa,0x18,0x00]
-# W64: v_cndmask_b16 v5, null, m0, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x7c,0xfa,0x18,0x00]
+# W32-REAL16: v_cndmask_b16 v5.l, null, m0, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0x7c,0xfa,0x18,0x00]
+# W32-FAKE16: v_cndmask_b16 v5, null, m0, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0x7c,0xfa,0x18,0x00]
+# W64-REAL16: v_cndmask_b16 v5.l, null, m0, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x7c,0xfa,0x18,0x00]
+# W64-FAKE16: v_cndmask_b16 v5, null, m0, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x7c,0xfa,0x18,0x00]
0x05,0x02,0x5d,0xd6,0xc1,0xd4,0xa0,0x41
-# W32: v_cndmask_b16 v5, -1, -|vcc_lo|, s104 ; encoding: [0x05,0x02,0x5d,0xd6,0xc1,0xd4,0xa0,0x41]
-# W64: v_cndmask_b16 v5, -1, -|vcc_lo|, s[104:105] ; encoding: [0x05,0x02,0x5d,0xd6,0xc1,0xd4,0xa0,0x41]
+# W32-REAL16: v_cndmask_b16 v5.l, -1, -|vcc_lo|, s104 ; encoding: [0x05,0x02,0x5d,0xd6,0xc1,0xd4,0xa0,0x41]
+# W32-FAKE16: v_cndmask_b16 v5, -1, -|vcc_lo|, s104 ; encoding: [0x05,0x02,0x5d,0xd6,0xc1,0xd4,0xa0,0x41]
+# W64-REAL16: v_cndmask_b16 v5.l, -1, -|vcc_lo|, s[104:105] ; encoding: [0x05,0x02,0x5d,0xd6,0xc1,0xd4,0xa0,0x41]
+# W64-FAKE16: v_cndmask_b16 v5, -1, -|vcc_lo|, s[104:105] ; encoding: [0x05,0x02,0x5d,0xd6,0xc1,0xd4,0xa0,0x41]
0x05,0x00,0x5d,0xd6,0xf0,0x82,0xa9,0x01
-# W32: v_cndmask_b16 v5, 0x3800, -1, vcc_lo ; encoding: [0x05,0x00,0x5d,0xd6,0xff,0x82,0xa9,0x01,0x00,0x38,0x00,0x00]
-# W64: v_cndmask_b16 v5, 0x3800, -1, vcc ; encoding: [0x05,0x00,0x5d,0xd6,0xff,0x82,0xa9,0x01,0x00,0x38,0x00,0x00]
+# W32-REAL16: v_cndmask_b16 v5.l, 0x3800, -1, vcc_lo ; encoding: [0x05,0x00,0x5d,0xd6,0xff,0x82,0xa9,0x01,0x00,0x38,0x00,0x00]
+# W32-FAKE16: v_cndmask_b16 v5, 0x3800, -1, vcc_lo ; encoding: [0x05,0x00,0x5d,0xd6,0xff,0x82,0xa9,0x01,0x00,0x38,0x00,0x00]
+# W64-REAL16: v_cndmask_b16 v5.l, 0x3800, -1, vcc ; encoding: [0x05,0x00,0x5d,0xd6,0xff,0x82,0xa9,0x01,0x00,0x38,0x00,0x00]
+# W64-FAKE16: v_cndmask_b16 v5, 0x3800, -1, vcc ; encoding: [0x05,0x00,0x5d,0xd6,0xff,0x82,0xa9,0x01,0x00,0x38,0x00,0x00]
0x05,0x01,0x5d,0xd6,0xfd,0xf8,0xe8,0x21
-# W32: v_cndmask_b16 v5, -|src_scc|, null, ttmp14 ; encoding: [0x05,0x01,0x5d,0xd6,0xfd,0xf8,0xe8,0x21]
-# W64: v_cndmask_b16 v5, -|src_scc|, null, ttmp[14:15] ; encoding: [0x05,0x01,0x5d,0xd6,0xfd,0xf8,0xe8,0x21]
+# W32-REAL16: v_cndmask_b16 v5.l, -|src_scc|, null, ttmp14 ; encoding: [0x05,0x01,0x5d,0xd6,0xfd,0xf8,0xe8,0x21]
+# W32-FAKE16: v_cndmask_b16 v5, -|src_scc|, null, ttmp14 ; encoding: [0x05,0x01,0x5d,0xd6,0xfd,0xf8,0xe8,0x21]
+# W64-REAL16: v_cndmask_b16 v5.l, -|src_scc|, null, ttmp[14:15] ; encoding: [0x05,0x01,0x5d,0xd6,0xfd,0xf8,0xe8,0x21]
+# W64-FAKE16: v_cndmask_b16 v5, -|src_scc|, null, ttmp[14:15] ; encoding: [0x05,0x01,0x5d,0xd6,0xfd,0xf8,0xe8,0x21]
0xff,0x03,0x5d,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00
-# GFX12: v_cndmask_b16 v255, -|0xfe0b|, -|vcc_hi|, null ; encoding: [0xff,0x03,0x5d,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00]
+# W32-REAL16: v_cndmask_b16 v255.l, -|0xfe0b|, -|vcc_hi|, null ; encoding: [0xff,0x03,0x5d,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_cndmask_b16 v255, -|0xfe0b|, -|vcc_hi|, null ; encoding: [0xff,0x03,0x5d,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_cndmask_b16 v255.l, -|0xfe0b|, -|vcc_hi|, null ; encoding: [0xff,0x03,0x5d,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_cndmask_b16 v255, -|0xfe0b|, -|vcc_hi|, null ; encoding: [0xff,0x03,0x5d,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00]
+
+0x05,0x08,0x5d,0xd6,0xff,0xe1,0x19,0x00
+# W32-REAL16: v_cndmask_b16 v5.l, v255.h, 0x3800, s6 ; encoding: [0x05,0x08,0x5d,0xd6,0xff,0xff,0x19,0x00,0x00,0x38,0x00,0x00]
+# W32-FAKE16: v_cndmask_b16 v5, v255, 0x3800, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0xff,0xff,0x19,0x00,0x00,0x38,0x00,0x00]
+# W64-REAL16: v_cndmask_b16 v5.l, v255.h, 0x3800, s[6:7] ; encoding: [0x05,0x08,0x5d,0xd6,0xff,0xff,0x19,0x00,0x00,0x38,0x00,0x00]
+# W64-FAKE16: v_cndmask_b16 v5, v255, 0x3800, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0xff,0xff,0x19,0x00,0x00,0x38,0x00,0x00]
+
+0x05,0x10,0x5d,0xd6,0x7d,0xfe,0x1b,0x00
+# W32-REAL16: v_cndmask_b16 v5.l, m0, v255.h, s6 ; encoding: [0x05,0x10,0x5d,0xd6,0x7d,0xfe,0x1b,0x00]
+# W32-FAKE16: v_cndmask_b16 v5, m0, v255, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0x7d,0xfe,0x1b,0x00]
+# W64-REAL16: v_cndmask_b16 v5.l, m0, v255.h, s[6:7] ; encoding: [0x05,0x10,0x5d,0xd6,0x7d,0xfe,0x1b,0x00]
+# W64-FAKE16: v_cndmask_b16 v5, m0, v255, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x7d,0xfe,0x1b,0x00]
+
+0xff,0x43,0x5d,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00
+# W32-REAL16: v_cndmask_b16 v255.h, -|0xfe0b|, -|vcc_hi|, null ; encoding: [0xff,0x43,0x5d,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_cndmask_b16 v255, -|0xfe0b|, -|vcc_hi|, null ; encoding: [0xff,0x03,0x5d,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_cndmask_b16 v255.h, -|0xfe0b|, -|vcc_hi|, null ; encoding: [0xff,0x43,0x5d,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_cndmask_b16 v255, -|0xfe0b|, -|vcc_hi|, null ; encoding: [0xff,0x03,0x5d,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00]
0x05,0x00,0x0c,0xd6,0x01,0x05,0x0e,0x00
# GFX12: v_cubeid_f32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x0c,0xd6,0x01,0x05,0x0e,0x00]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp16.txt
index 7e30a4a2096b19..0be540da8287be 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp16.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp16.txt
@@ -789,59 +789,106 @@
# GFX12: v_bfm_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x1d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff
-# W32: v_cndmask_b16_e64_dpp v5, v1, v2, s6 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff]
-# W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff]
+# W32-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s6 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff]
+# W32-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s6 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff]
+# W64-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff]
+# W64-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff]
0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff
-# W32: v_cndmask_b16_e64_dpp v5, v1, v2, s6 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff]
-# W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff]
+# W32-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s6 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff]
+# W32-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s6 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff]
+# W64-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff]
+# W64-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff]
0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff
-# W32: v_cndmask_b16_e64_dpp v5, v1, v2, s6 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff]
-# W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff]
+# W32-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s6 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff]
+# W32-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s6 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff]
+# W64-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff]
+# W64-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff]
0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff
-# W32: v_cndmask_b16_e64_dpp v5, v1, v2, s6 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff]
-# W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff]
+# W32-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s6 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff]
+# W32-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s6 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff]
+# W64-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff]
+# W64-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff]
0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff
-# W32: v_cndmask_b16_e64_dpp v5, v1, v2, s6 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff]
-# W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff]
+# W32-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s6 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff]
+# W32-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s6 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff]
+# W64-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff]
+# W64-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff]
0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff
-# W32: v_cndmask_b16_e64_dpp v5, v1, v2, s6 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff]
-# W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff]
+# W32-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s6 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff]
+# W32-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s6 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff]
+# W64-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff]
+# W64-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff]
0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff
-# W32: v_cndmask_b16_e64_dpp v5, v1, v2, s6 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff]
-# W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff]
+# W32-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s6 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff]
+# W32-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s6 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff]
+# W64-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff]
+# W64-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff]
0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff
-# W32: v_cndmask_b16_e64_dpp v5, v1, v2, s6 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff]
-# W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff]
+# W32-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s6 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff]
+# W32-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s6 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff]
+# W64-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff]
+# W64-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff]
0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff
-# W32: v_cndmask_b16_e64_dpp v5, v1, v2, s6 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff]
-# W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff]
+# W32-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s6 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff]
+# W32-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s6 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff]
+# W64-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff]
+# W64-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff]
0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff
-# W32: v_cndmask_b16_e64_dpp v5, v1, v2, s6 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff]
-# W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff]
+# W32-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s6 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff]
+# W32-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s6 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff]
+# W64-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff]
+# W64-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff]
0x05,0x00,0x5d,0xd6,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff
-# W32: v_cndmask_b16_e64_dpp v5, v1, v2, s104 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff]
-# W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[104:105] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff]
+# W32-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s104 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff]
+# W32-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s104 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff]
+# W64-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[104:105] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff]
+# W64-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s[104:105] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff]
0x05,0x00,0x5d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x5f,0x01,0x01
-# W32: v_cndmask_b16_e64_dpp v5, v1, v2, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x5f,0x01,0x01]
-# W64: v_cndmask_b16_e64_dpp v5, v1, v2, vcc row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x5f,0x01,0x01]
+# W32-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, vcc row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, vcc row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x5f,0x01,0x01]
0x05,0x00,0x5d,0xd6,0xfa,0x04,0xea,0x01,0x01,0x60,0x01,0x13
-# W32: v_cndmask_b16_e64_dpp v5, v1, v2, ttmp14 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xea,0x01,0x01,0x60,0x01,0x13]
-# W64: v_cndmask_b16_e64_dpp v5, v1, v2, ttmp[14:15] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xea,0x01,0x01,0x60,0x01,0x13]
+# W32-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, ttmp14 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xea,0x01,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, ttmp14 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xea,0x01,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, ttmp[14:15] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xea,0x01,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, ttmp[14:15] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xea,0x01,0x01,0x60,0x01,0x13]
0xff,0x00,0x5d,0xd6,0xfa,0xfe,0xf3,0x01,0xff,0x6f,0x0d,0x30
-# GFX12: v_cndmask_b16_e64_dpp v255, v255, v255, null row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x5d,0xd6,0xfa,0xfe,0xf3,0x01,0xff,0x6f,0x0d,0x30]
+# W32-REAL16: v_cndmask_b16_e64_dpp v255.l, v255.l, v255.l, null row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x5d,0xd6,0xfa,0xfe,0xf3,0x01,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_cndmask_b16_e64_dpp v255, v255, v255, null row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x5d,0xd6,0xfa,0xfe,0xf3,0x01,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_cndmask_b16_e64_dpp v255.l, v255.l, v255.l, null row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x5d,0xd6,0xfa,0xfe,0xf3,0x01,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_cndmask_b16_e64_dpp v255, v255, v255, null row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x5d,0xd6,0xfa,0xfe,0xf3,0x01,0xff,0x6f,0x0d,0x30]
+
+0x05,0x09,0x5d,0xd6,0xfa,0x04,0xaa,0x41,0x01,0x5f,0x01,0x01
+# W32-REAL16: v_cndmask_b16_e64_dpp v5.l, |v1.h|, -v2.l, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x09,0x5d,0xd6,0xfa,0x04,0xaa,0x41,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_cndmask_b16_e64_dpp v5, |v1|, -v2, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x5d,0xd6,0xfa,0x04,0xaa,0x41,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_cndmask_b16_e64_dpp v5.l, |v1.h|, -v2.l, vcc row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x09,0x5d,0xd6,0xfa,0x04,0xaa,0x41,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_cndmask_b16_e64_dpp v5, |v1|, -v2, vcc row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x5d,0xd6,0xfa,0x04,0xaa,0x41,0x01,0x5f,0x01,0x01]
+
+0x05,0x12,0x5d,0xd6,0xfa,0x04,0xea,0x21,0x01,0x60,0x01,0x13
+# W32-REAL16: v_cndmask_b16_e64_dpp v5.l, -v1.l, |v2.h|, ttmp14 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x12,0x5d,0xd6,0xfa,0x04,0xea,0x21,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_cndmask_b16_e64_dpp v5, -v1, |v2|, ttmp14 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x02,0x5d,0xd6,0xfa,0x04,0xea,0x21,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_cndmask_b16_e64_dpp v5.l, -v1.l, |v2.h|, ttmp[14:15] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x12,0x5d,0xd6,0xfa,0x04,0xea,0x21,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_cndmask_b16_e64_dpp v5, -v1, |v2|, ttmp[14:15] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x02,0x5d,0xd6,0xfa,0x04,0xea,0x21,0x01,0x60,0x01,0x13]
+
+0xff,0x43,0x5d,0xd6,0xfa,0xfe,0xf3,0x61,0xff,0x6f,0x0d,0x30
+# W32-REAL16: v_cndmask_b16_e64_dpp v255.h, -|v255.l|, -|v255.l|, null row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x43,0x5d,0xd6,0xfa,0xfe,0xf3,0x61,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_cndmask_b16_e64_dpp v255, -|v255|, -|v255|, null row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x03,0x5d,0xd6,0xfa,0xfe,0xf3,0x61,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_cndmask_b16_e64_dpp v255.h, -|v255.l|, -|v255.l|, null row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x43,0x5d,0xd6,0xfa,0xfe,0xf3,0x61,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_cndmask_b16_e64_dpp v255, -|v255|, -|v255|, null row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x03,0x5d,0xd6,0xfa,0xfe,0xf3,0x61,0xff,0x6f,0x0d,0x30]
0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff
# GFX12: v_cubeid_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp8.txt
index 2aaba2a17fae6d..343a71abb27d06 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp8.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp8.txt
@@ -447,23 +447,52 @@
# GFX12: v_bfm_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x1d,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
0x05,0x00,0x5d,0xd6,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05
-# W32: v_cndmask_b16_e64_dpp v5, v1, v2, s6 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05]
-# W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s6 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s6 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05]
0x05,0x00,0x5d,0xd6,0xe9,0x04,0xa2,0x01,0x01,0x77,0x39,0x05
-# W32: v_cndmask_b16_e64_dpp v5, v1, v2, s104 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0xa2,0x01,0x01,0x77,0x39,0x05]
-# W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[104:105] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0xa2,0x01,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s104 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0xa2,0x01,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s104 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0xa2,0x01,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[104:105] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0xa2,0x01,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s[104:105] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0xa2,0x01,0x01,0x77,0x39,0x05]
0x05,0x00,0x5d,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05
-# W32: v_cndmask_b16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05]
-# W64: v_cndmask_b16_e64_dpp v5, v1, v2, vcc dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, vcc dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, vcc dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05]
0x05,0x00,0x5d,0xd6,0xe9,0x04,0xea,0x01,0x01,0x77,0x39,0x05
-# W32: v_cndmask_b16_e64_dpp v5, v1, v2, ttmp14 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0xea,0x01,0x01,0x77,0x39,0x05]
-# W64: v_cndmask_b16_e64_dpp v5, v1, v2, ttmp[14:15] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0xea,0x01,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, ttmp14 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0xea,0x01,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, ttmp14 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0xea,0x01,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, ttmp[14:15] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0xea,0x01,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, ttmp[14:15] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0xea,0x01,0x01,0x77,0x39,0x05]
0xff,0x00,0x5d,0xd6,0xea,0xfe,0xf3,0x01,0xff,0x00,0x00,0x00
-# GFX12: v_cndmask_b16_e64_dpp v255, v255, v255, null dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x5d,0xd6,0xea,0xfe,0xf3,0x01,0xff,0x00,0x00,0x00]
+# W32-REAL16: v_cndmask_b16_e64_dpp v255.l, v255.l, v255.l, null dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x5d,0xd6,0xea,0xfe,0xf3,0x01,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_cndmask_b16_e64_dpp v255, v255, v255, null dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x5d,0xd6,0xea,0xfe,0xf3,0x01,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_cndmask_b16_e64_dpp v255.l, v255.l, v255.l, null dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x5d,0xd6,0xea,0xfe,0xf3,0x01,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_cndmask_b16_e64_dpp v255, v255, v255, null dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x5d,0xd6,0xea,0xfe,0xf3,0x01,0xff,0x00,0x00,0x00]
+
+0x05,0x09,0x5d,0xd6,0xe9,0x04,0xaa,0x41,0x01,0x77,0x39,0x05
+# W32-REAL16: v_cndmask_b16_e64_dpp v5.l, |v1.h|, -v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x09,0x5d,0xd6,0xe9,0x04,0xaa,0x41,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cndmask_b16_e64_dpp v5, |v1|, -v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x5d,0xd6,0xe9,0x04,0xaa,0x41,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cndmask_b16_e64_dpp v5.l, |v1.h|, -v2.l, vcc dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x09,0x5d,0xd6,0xe9,0x04,0xaa,0x41,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cndmask_b16_e64_dpp v5, |v1|, -v2, vcc dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x5d,0xd6,0xe9,0x04,0xaa,0x41,0x01,0x77,0x39,0x05]
+
+0x05,0x12,0x5d,0xd6,0xe9,0x04,0xea,0x21,0x01,0x77,0x39,0x05
+# W32-REAL16: v_cndmask_b16_e64_dpp v5.l, -v1.l, |v2.h|, ttmp14 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x12,0x5d,0xd6,0xe9,0x04,0xea,0x21,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cndmask_b16_e64_dpp v5, -v1, |v2|, ttmp14 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x5d,0xd6,0xe9,0x04,0xea,0x21,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cndmask_b16_e64_dpp v5.l, -v1.l, |v2.h|, ttmp[14:15] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x12,0x5d,0xd6,0xe9,0x04,0xea,0x21,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cndmask_b16_e64_dpp v5, -v1, |v2|, ttmp[14:15] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x5d,0xd6,0xe9,0x04,0xea,0x21,0x01,0x77,0x39,0x05]
+
+0xff,0x43,0x5d,0xd6,0xea,0xfe,0xf3,0x61,0xff,0x00,0x00,0x00
+# W32-REAL16: v_cndmask_b16_e64_dpp v255.h, -|v255.l|, -|v255.l|, null dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x43,0x5d,0xd6,0xea,0xfe,0xf3,0x61,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_cndmask_b16_e64_dpp v255, -|v255|, -|v255|, null dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x03,0x5d,0xd6,0xea,0xfe,0xf3,0x61,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_cndmask_b16_e64_dpp v255.h, -|v255.l|, -|v255.l|, null dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x43,0x5d,0xd6,0xea,0xfe,0xf3,0x61,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_cndmask_b16_e64_dpp v255, -|v255|, -|v255|, null dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x03,0x5d,0xd6,0xea,0xfe,0xf3,0x61,0xff,0x00,0x00,0x00]
0x05,0x00,0x0c,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05
# GFX12: v_cubeid_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0c,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05]
More information about the llvm-commits
mailing list