[llvm] c6f753b - [AMDGPU][True16][MC] true16 for v_pack_b32_f16 (#119630)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Dec 18 10:28:45 PST 2024
Author: Brox Chen
Date: 2024-12-18T13:28:42-05:00
New Revision: c6f753b9a0910cdbb6bee77af1e5a54cf2e168fa
URL: https://github.com/llvm/llvm-project/commit/c6f753b9a0910cdbb6bee77af1e5a54cf2e168fa
DIFF: https://github.com/llvm/llvm-project/commit/c6f753b9a0910cdbb6bee77af1e5a54cf2e168fa.diff
LOG: [AMDGPU][True16][MC] true16 for v_pack_b32_f16 (#119630)
Support true16 format for v_pack_b32_f16 in MC.
Since we are replacing v_alignbit_b32 to
`v_pack_b32_f16_t16/v_pack_b32_f16_fake16` in Post-GFX11, have to update
the CodeGen pattern for `v_pack_b32_f16_fake16 `to get CodeGen test
passing. There is no pattern modified/created, but just replacing the
`v_pack_b32_f16` with fake16 format.
Some of the true16 CodeGen test are impacted since `v_pack_b32_f16`
selection are removed in Post-GFX11 while `v_pack_b32_f16_t16` are not
yet supported. The CodeGen patch for `v_pack_b32_f16_t16` will be done
is the following patch.
Added:
Modified:
llvm/lib/Target/AMDGPU/SIInstructions.td
llvm/lib/Target/AMDGPU/VOP3Instructions.td
llvm/test/CodeGen/AMDGPU/llvm.ceil.f16.ll
llvm/test/CodeGen/AMDGPU/llvm.floor.f16.ll
llvm/test/CodeGen/AMDGPU/llvm.ldexp.ll
llvm/test/CodeGen/AMDGPU/sitofp.f16.ll
llvm/test/CodeGen/AMDGPU/uitofp.f16.ll
llvm/test/MC/AMDGPU/gfx11_asm_vop3.s
llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s
llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s
llvm/test/MC/AMDGPU/gfx12_asm_vop3.s
llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s
llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s
llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3.txt
llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16.txt
llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8.txt
llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3.txt
llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp16.txt
llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp8.txt
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 0ecd6346707e25..b1f93a447a7b82 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -3377,15 +3377,22 @@ def : GCNPat <
} // end foreach Ty
+} // End SubtargetPredicate = HasVOP3PInsts
let AddedComplexity = 5 in {
-def : GCNPat <
+class PackB32Pat<Instruction inst> : GCNPat <
(v2f16 (is_canonicalized_2<build_vector> (f16 (VOP3Mods (f16 VGPR_32:$src0), i32:$src0_mods)),
(f16 (VOP3Mods (f16 VGPR_32:$src1), i32:$src1_mods)))),
- (V_PACK_B32_F16_e64 $src0_mods, VGPR_32:$src0, $src1_mods, VGPR_32:$src1)
+ (inst $src0_mods, VGPR_32:$src0, $src1_mods, VGPR_32:$src1)
>;
}
-} // End SubtargetPredicate = HasVOP3PInsts
+let SubtargetPredicate = isGFX9Plus in {
+let True16Predicate = NotHasTrue16BitInsts in
+ def : PackB32Pat<V_PACK_B32_F16_e64>;
+
+let True16Predicate = UseFakeTrue16Insts in
+ def : PackB32Pat<V_PACK_B32_F16_fake16_e64>;
+} // End SubtargetPredicate = isGFX9Plus
// With multiple uses of the shift, this will duplicate the shift and
// increase register pressure.
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index 4c8b5a6bea3b2c..34f90b33bc4ba4 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -646,7 +646,7 @@ defm V_MAD_I32_I16 : VOP3Inst <"v_mad_i32_i16", VOP3_Profile<VOP_I32_I16_I16_I32
defm V_CVT_PKNORM_I16_F16 : VOP3Inst <"v_cvt_pknorm_i16_f16", VOP3_Profile<VOP_B32_F16_F16, VOP3_OPSEL>>;
defm V_CVT_PKNORM_U16_F16 : VOP3Inst <"v_cvt_pknorm_u16_f16", VOP3_Profile<VOP_B32_F16_F16, VOP3_OPSEL>>;
-defm V_PACK_B32_F16 : VOP3Inst <"v_pack_b32_f16", VOP3_Profile<VOP_B32_F16_F16, VOP3_OPSEL>>;
+defm V_PACK_B32_F16 : VOP3Inst_t16 <"v_pack_b32_f16", VOP_B32_F16_F16>;
let isReMaterializable = 1 in {
defm V_SUB_I32 : VOP3Inst <"v_sub_i32", VOP3_Profile<VOP_I32_I32_I32_ARITH>>;
@@ -1754,7 +1754,7 @@ defm V_MIN_U16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x30
defm V_MIN_I16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x30c, "v_min_i16">;
defm V_ADD_NC_I16 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x30d, "v_add_nc_i16", "V_ADD_I16">;
defm V_SUB_NC_I16 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x30e, "v_sub_nc_i16", "V_SUB_I16">;
-defm V_PACK_B32_F16 : VOP3_Realtriple_gfx11_gfx12<0x311>;
+defm V_PACK_B32_F16 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x311, "v_pack_b32_f16">;
defm V_CVT_PK_NORM_I16_F16 : VOP3_Realtriple_with_name_gfx11_gfx12<0x312, "V_CVT_PKNORM_I16_F16" , "v_cvt_pk_norm_i16_f16" >;
defm V_CVT_PK_NORM_U16_F16 : VOP3_Realtriple_with_name_gfx11_gfx12<0x313, "V_CVT_PKNORM_U16_F16" , "v_cvt_pk_norm_u16_f16" >;
defm V_SUB_NC_I32 : VOP3_Realtriple_with_name_gfx11_gfx12<0x325, "V_SUB_I32", "v_sub_nc_i32">;
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.ceil.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.ceil.f16.ll
index 6b57f20f25e2ce..84a3a3e88d2383 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.ceil.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.ceil.f16.ll
@@ -163,9 +163,11 @@ define amdgpu_kernel void @ceil_v2f16(
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-NEXT: v_ceil_f16_e32 v0.h, v1.l
; GFX11-NEXT: v_mov_b16_e32 v1.l, v0.l
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-NEXT: v_mov_b16_e32 v0.l, v0.h
-; GFX11-NEXT: v_pack_b32_f16 v0, v1, v0
+; GFX11-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_lshl_or_b32 v0, v0, 16, v1
; GFX11-NEXT: buffer_store_b32 v0, off, s[4:7], 0
; GFX11-NEXT: s_endpgm
;
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.floor.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.floor.f16.ll
index 4f206b82fdd601..9909cfd32b11f7 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.floor.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.floor.f16.ll
@@ -164,9 +164,11 @@ define amdgpu_kernel void @floor_v2f16(
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-NEXT: v_floor_f16_e32 v0.h, v1.l
; GFX11-NEXT: v_mov_b16_e32 v1.l, v0.l
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-NEXT: v_mov_b16_e32 v0.l, v0.h
-; GFX11-NEXT: v_pack_b32_f16 v0, v1, v0
+; GFX11-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_lshl_or_b32 v0, v0, 16, v1
; GFX11-NEXT: buffer_store_b32 v0, off, s[4:7], 0
; GFX11-NEXT: s_endpgm
;
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.ldexp.ll b/llvm/test/CodeGen/AMDGPU/llvm.ldexp.ll
index 72e86f1f6f9992..53c26cadbf75a6 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.ldexp.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.ldexp.ll
@@ -480,11 +480,9 @@ define <2 x half> @test_ldexp_v2f16_v2i32(<2 x half> %a, <2 x i32> %b) {
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v0.h, v3.l, v2.l
; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v0.l, v0.l, v1.l
-; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.l
-; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v0.h
-; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SDAG-TRUE16-NEXT: v_pack_b32_f16 v0, v1, v0
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.h
+; GFX11-SDAG-TRUE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-SDAG-FAKE16-LABEL: test_ldexp_v2f16_v2i32:
@@ -610,12 +608,11 @@ define <2 x half> @test_ldexp_v2f16_v2i16(<2 x half> %a, <2 x i16> %b) {
; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v2, 16, v1
; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v3, 16, v0
; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v0.l, v0.l, v1.l
-; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v0.h, v3.l, v2.l
-; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.l
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v0.h
-; GFX11-SDAG-TRUE16-NEXT: v_pack_b32_f16 v0, v1, v0
+; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v0.h, v3.l, v2.l
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.h
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-TRUE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-SDAG-FAKE16-LABEL: test_ldexp_v2f16_v2i16:
@@ -744,12 +741,11 @@ define <3 x half> @test_ldexp_v3f16_v3i32(<3 x half> %a, <3 x i32> %b) {
; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v0.h, v5.l, v3.l
; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v0.l, v0.l, v2.l
; GFX11-SDAG-TRUE16-NEXT: v_med3_i32 v2, v4, s0, 0x7fff
-; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v0.l
-; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v0.h
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v0.h
; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v1.l, v1.l, v2.l
-; GFX11-SDAG-TRUE16-NEXT: v_pack_b32_f16 v0, v3, v0
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX11-SDAG-TRUE16-NEXT: v_perm_b32 v0, v3, v0, 0x5040100
; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-SDAG-FAKE16-LABEL: test_ldexp_v3f16_v3i32:
@@ -900,7 +896,7 @@ define <3 x half> @test_ldexp_v3f16_v3i16(<3 x half> %a, <3 x i16> %b) {
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.l
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v0.h
-; GFX11-SDAG-TRUE16-NEXT: v_pack_b32_f16 v0, v2, v0
+; GFX11-SDAG-TRUE16-NEXT: v_perm_b32 v0, v0, v2, 0x5040100
; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-SDAG-FAKE16-LABEL: test_ldexp_v3f16_v3i16:
@@ -1043,24 +1039,21 @@ define <4 x half> @test_ldexp_v4f16_v4i32(<4 x half> %a, <4 x i32> %b) {
; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v6, 16, v0
; GFX11-SDAG-TRUE16-NEXT: v_med3_i32 v5, v5, s0, 0x7fff
; GFX11-SDAG-TRUE16-NEXT: v_med3_i32 v3, v3, s0, 0x7fff
+; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v7, 16, v1
; GFX11-SDAG-TRUE16-NEXT: v_med3_i32 v2, v2, s0, 0x7fff
; GFX11-SDAG-TRUE16-NEXT: v_med3_i32 v4, v4, s0, 0x7fff
-; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v7, 16, v1
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v0.h, v6.l, v3.l
-; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v0.l, v0.l, v2.l
+; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v1.h, v7.l, v5.l
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v0.l, v0.l, v2.l
; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v1.l, v1.l, v4.l
-; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v1.h, v7.l, v5.l
-; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4)
-; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.l
-; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v0.h
-; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v1.l
-; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v1.h
-; GFX11-SDAG-TRUE16-NEXT: v_pack_b32_f16 v0, v2, v0
-; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-SDAG-TRUE16-NEXT: v_pack_b32_f16 v1, v3, v1
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.h
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v1.h
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SDAG-TRUE16-NEXT: v_perm_b32 v0, v2, v0, 0x5040100
+; GFX11-SDAG-TRUE16-NEXT: v_perm_b32 v1, v3, v1, 0x5040100
; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-SDAG-FAKE16-LABEL: test_ldexp_v4f16_v4i32:
@@ -1257,8 +1250,8 @@ define <4 x half> @test_ldexp_v4f16_v4i16(<4 x half> %a, <4 x i16> %b) {
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v1.l
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v1.h
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SDAG-TRUE16-NEXT: v_pack_b32_f16 v0, v2, v0
-; GFX11-SDAG-TRUE16-NEXT: v_pack_b32_f16 v1, v3, v1
+; GFX11-SDAG-TRUE16-NEXT: v_perm_b32 v0, v0, v2, 0x5040100
+; GFX11-SDAG-TRUE16-NEXT: v_perm_b32 v1, v1, v3, 0x5040100
; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-SDAG-FAKE16-LABEL: test_ldexp_v4f16_v4i16:
diff --git a/llvm/test/CodeGen/AMDGPU/sitofp.f16.ll b/llvm/test/CodeGen/AMDGPU/sitofp.f16.ll
index cda4c085cd25a6..245df6684384c6 100644
--- a/llvm/test/CodeGen/AMDGPU/sitofp.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/sitofp.f16.ll
@@ -240,9 +240,11 @@ define amdgpu_kernel void @sitofp_v2i16_to_v2f16(
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_cvt_f16_i16_e32 v0.h, v1.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.l
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, v0.h
-; GFX11-TRUE16-NEXT: v_pack_b32_f16 v0, v1, v0
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_lshl_or_b32 v0, v0, 16, v1
; GFX11-TRUE16-NEXT: buffer_store_b32 v0, off, s[4:7], 0
; GFX11-TRUE16-NEXT: s_endpgm
;
@@ -344,8 +346,9 @@ define amdgpu_kernel void @sitofp_v2i32_to_v2f16(
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, v0.h
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-TRUE16-NEXT: v_pack_b32_f16 v0, v1, v0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX11-TRUE16-NEXT: v_lshl_or_b32 v0, v0, 16, v1
; GFX11-TRUE16-NEXT: buffer_store_b32 v0, off, s[4:7], 0
; GFX11-TRUE16-NEXT: s_endpgm
;
diff --git a/llvm/test/CodeGen/AMDGPU/uitofp.f16.ll b/llvm/test/CodeGen/AMDGPU/uitofp.f16.ll
index 37d1116e9eccb6..bc1b102d33de16 100644
--- a/llvm/test/CodeGen/AMDGPU/uitofp.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/uitofp.f16.ll
@@ -240,9 +240,11 @@ define amdgpu_kernel void @uitofp_v2i16_to_v2f16(
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_cvt_f16_u16_e32 v0.h, v1.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.l
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, v0.h
-; GFX11-TRUE16-NEXT: v_pack_b32_f16 v0, v1, v0
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_lshl_or_b32 v0, v0, 16, v1
; GFX11-TRUE16-NEXT: buffer_store_b32 v0, off, s[4:7], 0
; GFX11-TRUE16-NEXT: s_endpgm
;
@@ -344,8 +346,9 @@ define amdgpu_kernel void @uitofp_v2i32_to_v2f16(
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, v0.h
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-TRUE16-NEXT: v_pack_b32_f16 v0, v1, v0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX11-TRUE16-NEXT: v_lshl_or_b32 v0, v0, 16, v1
; GFX11-TRUE16-NEXT: buffer_store_b32 v0, off, s[4:7], 0
; GFX11-TRUE16-NEXT: s_endpgm
;
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s
index c392ff85deef8b..d46f010a2dafbd 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s
@@ -5435,11 +5435,11 @@ v_or_b16 v5.l, v255.l, v255.h
v_or_b16 v255.h, 0xfe0b, vcc_hi
// GFX11: [0xff,0x40,0x63,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
-v_pack_b32_f16 v5, v1, v2
-// GFX11: v_pack_b32_f16 v5, v1, v2 ; encoding: [0x05,0x00,0x11,0xd7,0x01,0x05,0x02,0x00]
+v_pack_b32_f16 v5, v1.l, v2.l
+// GFX11: v_pack_b32_f16 v5, v1.l, v2.l ; encoding: [0x05,0x00,0x11,0xd7,0x01,0x05,0x02,0x00]
-v_pack_b32_f16 v5, v255, v255
-// GFX11: v_pack_b32_f16 v5, v255, v255 ; encoding: [0x05,0x00,0x11,0xd7,0xff,0xff,0x03,0x00]
+v_pack_b32_f16 v5, v255.l, v255.l
+// GFX11: v_pack_b32_f16 v5, v255.l, v255.l ; encoding: [0x05,0x00,0x11,0xd7,0xff,0xff,0x03,0x00]
v_pack_b32_f16 v5, s1, s2
// GFX11: v_pack_b32_f16 v5, s1, s2 ; encoding: [0x05,0x00,0x11,0xd7,0x01,0x04,0x00,0x00]
@@ -5471,7 +5471,7 @@ v_pack_b32_f16 v5, null, exec_lo
v_pack_b32_f16 v5, -1, exec_hi
// GFX11: v_pack_b32_f16 v5, -1, exec_hi ; encoding: [0x05,0x00,0x11,0xd7,0xc1,0xfe,0x00,0x00]
-v_pack_b32_f16 v5, 0.5, -m0 op_sel:[0,0,0]
+v_pack_b32_f16 v5, 0.5, -m0
// GFX11: v_pack_b32_f16 v5, 0.5, -m0 ; encoding: [0x05,0x00,0x11,0xd7,0xf0,0xfa,0x00,0x40]
v_pack_b32_f16 v5, -src_scc, |vcc_lo| op_sel:[1,0,0]
@@ -5480,6 +5480,18 @@ v_pack_b32_f16 v5, -src_scc, |vcc_lo| op_sel:[1,0,0]
v_pack_b32_f16 v255, -|0xfe0b|, -|vcc_hi| op_sel:[0,1,0]
// GFX11: v_pack_b32_f16 v255, -|0xfe0b|, -|vcc_hi| op_sel:[0,1,0] ; encoding: [0xff,0x13,0x11,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
+v_pack_b32_f16 v5, v1.h, v2.l
+// GFX11: v_pack_b32_f16 v5, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x11,0xd7,0x01,0x05,0x02,0x00]
+
+v_pack_b32_f16 v5, v255.l, v255.h
+// GFX11: v_pack_b32_f16 v5, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x11,0xd7,0xff,0xff,0x03,0x00]
+
+v_pack_b32_f16 v5, -src_scc, |vcc_lo|
+// GFX11: v_pack_b32_f16 v5, -src_scc, |vcc_lo| ; encoding: [0x05,0x02,0x11,0xd7,0xfd,0xd4,0x00,0x20]
+
+v_pack_b32_f16 v255, -|0xfe0b|, -|vcc_hi|
+// GFX11: v_pack_b32_f16 v255, -|0xfe0b|, -|vcc_hi| ; encoding: [0xff,0x03,0x11,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
+
v_perm_b32 v5, v1, v2, s3
// GFX11: v_perm_b32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x44,0xd6,0x01,0x05,0x0e,0x00]
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s
index 38369c8dcc4d43..36d959faa99840 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s
@@ -4022,47 +4022,53 @@ v_or_b16_e64_dpp v5.l, v1.l, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_c
v_or_b16_e64_dpp v255.h, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
// GFX11: [0xff,0x40,0x63,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
-v_pack_b32_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0]
-// GFX11: v_pack_b32_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+v_pack_b32_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0]
+// GFX11: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
-v_pack_b32_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3]
-// GFX11: v_pack_b32_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+v_pack_b32_f16_e64_dpp v5, v1.l, v2.l quad_perm:[0,1,2,3]
+// GFX11: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
-v_pack_b32_f16_e64_dpp v5, v1, v2 row_mirror
-// GFX11: v_pack_b32_f16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_mirror
+// GFX11: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
-v_pack_b32_f16_e64_dpp v5, v1, v2 row_half_mirror
-// GFX11: v_pack_b32_f16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_half_mirror
+// GFX11: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
-v_pack_b32_f16_e64_dpp v5, v1, v2 row_shl:1
-// GFX11: v_pack_b32_f16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_shl:1
+// GFX11: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
-v_pack_b32_f16_e64_dpp v5, v1, v2 row_shl:15
-// GFX11: v_pack_b32_f16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_shl:15
+// GFX11: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
-v_pack_b32_f16_e64_dpp v5, v1, v2 row_shr:1
-// GFX11: v_pack_b32_f16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_shr:1
+// GFX11: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
-v_pack_b32_f16_e64_dpp v5, v1, v2 row_shr:15
-// GFX11: v_pack_b32_f16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_shr:15
+// GFX11: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
-v_pack_b32_f16_e64_dpp v5, v1, v2 row_ror:1
-// GFX11: v_pack_b32_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_ror:1
+// GFX11: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
-v_pack_b32_f16_e64_dpp v5, v1, v2 row_ror:15
-// GFX11: v_pack_b32_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_ror:15
+// GFX11: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
-v_pack_b32_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// GFX11: v_pack_b32_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
-v_pack_b32_f16_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// GFX11: v_pack_b32_f16_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x11,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+v_pack_b32_f16_e64_dpp v5, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: v_pack_b32_f16_e64_dpp v5, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x11,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
-v_pack_b32_f16_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// GFX11: v_pack_b32_f16_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x02,0x11,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+v_pack_b32_f16_e64_dpp v5, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
+// GFX11: v_pack_b32_f16_e64_dpp v5, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x02,0x11,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
-v_pack_b32_f16_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// GFX11: v_pack_b32_f16_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x03,0x11,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+v_pack_b32_f16_e64_dpp v255, -|v255.l|, -|v255.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1
+// GFX11: v_pack_b32_f16_e64_dpp v255, -|v255.l|, -|v255.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x03,0x11,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+
+v_pack_b32_f16_e64_dpp v5, -v1.h, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: v_pack_b32_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x0a,0x11,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+
+v_pack_b32_f16_e64_dpp v255, -|v255.l|, -|v255.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: v_pack_b32_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x13,0x11,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
v_perm_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0]
// GFX11: v_perm_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff]
@@ -4970,11 +4976,11 @@ v_min3_u16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x
v_min3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1
// GFX11: v_min3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4b,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30]
-v_pack_b32_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3
-// GFX11: v_pack_b32_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x11,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+v_pack_b32_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3
+// GFX11: v_pack_b32_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x11,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
-v_pack_b32_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1
-// GFX11: v_pack_b32_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x11,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+v_pack_b32_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1
+// GFX11: v_pack_b32_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x11,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
v_dot2_f16_f16_e64_dpp v0.l, v1, v2, v3.l quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1
// GFX11: v_dot2_f16_f16_e64_dpp v0.l, v1, v2, v3.l quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x00,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00]
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s
index b2fc2c5908498e..479bd19eaac896 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s
@@ -2650,17 +2650,23 @@ v_or_b16_e64_dpp v5.l, v1.l, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1
v_or_b16_e64_dpp v255.h, v255.l, v255.l dpp8:[0,0,0,0,0,0,0,0] fi:0
// GFX11: [0xff,0x40,0x63,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
-v_pack_b32_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_pack_b32_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x11,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_pack_b32_f16_e64_dpp v5, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x11,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-v_pack_b32_f16_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_pack_b32_f16_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x11,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+v_pack_b32_f16_e64_dpp v5, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_pack_b32_f16_e64_dpp v5, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x11,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
-v_pack_b32_f16_e64_dpp v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
-// GFX11: v_pack_b32_f16_e64_dpp v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x02,0x11,0xd7,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+v_pack_b32_f16_e64_dpp v5, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX11: v_pack_b32_f16_e64_dpp v5, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x02,0x11,0xd7,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
-v_pack_b32_f16_e64_dpp v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:0
-// GFX11: v_pack_b32_f16_e64_dpp v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x03,0x11,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+v_pack_b32_f16_e64_dpp v255, -|v255.l|, -|v255.l| dpp8:[0,0,0,0,0,0,0,0]
+// GFX11: v_pack_b32_f16_e64_dpp v255, -|v255.l|, -|v255.l| dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x03,0x11,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+v_pack_b32_f16_e64_dpp v5, -v1.h, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX11: v_pack_b32_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x0a,0x11,0xd7,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+
+v_pack_b32_f16_e64_dpp v255, -|v255.l|, -|v255.h| dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: v_pack_b32_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x13,0x11,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
v_perm_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_perm_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x44,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05]
@@ -3270,11 +3276,11 @@ v_min3_u16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0]
v_min3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1
// GFX11: v_min3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x4b,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00]
-v_pack_b32_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_pack_b32_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x11,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+v_pack_b32_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_pack_b32_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x11,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
-v_pack_b32_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1
-// GFX11: v_pack_b32_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x11,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+v_pack_b32_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1
+// GFX11: v_pack_b32_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x11,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
v_dot2_f16_f16_e64_dpp v0.l, v1, v2, v3.l dpp8:[0,1,2,3,4,4,4,4]
// GFX11: v_dot2_f16_f16_e64_dpp v0.l, v1, v2, v3.l dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x00,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92]
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3.s
index 482b58d96c8f10..62bebd00ee51f8 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3.s
@@ -5156,11 +5156,11 @@ v_or_b16 v5.l, v255.l, v255.h
v_or_b16 v255.h, 0xfe0b, vcc_hi
// GFX12: v_or_b16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] ; encoding: [0xff,0x40,0x63,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
-v_pack_b32_f16 v5, v1, v2
-// GFX12: v_pack_b32_f16 v5, v1, v2 ; encoding: [0x05,0x00,0x11,0xd7,0x01,0x05,0x02,0x00]
+v_pack_b32_f16 v5, v1.l, v2.l
+// GFX12: v_pack_b32_f16 v5, v1.l, v2.l ; encoding: [0x05,0x00,0x11,0xd7,0x01,0x05,0x02,0x00]
-v_pack_b32_f16 v5, v255, v255
-// GFX12: v_pack_b32_f16 v5, v255, v255 ; encoding: [0x05,0x00,0x11,0xd7,0xff,0xff,0x03,0x00]
+v_pack_b32_f16 v5, v255.l, v255.l
+// GFX12: v_pack_b32_f16 v5, v255.l, v255.l ; encoding: [0x05,0x00,0x11,0xd7,0xff,0xff,0x03,0x00]
v_pack_b32_f16 v5, s1, s2
// GFX12: v_pack_b32_f16 v5, s1, s2 ; encoding: [0x05,0x00,0x11,0xd7,0x01,0x04,0x00,0x00]
@@ -5192,7 +5192,7 @@ v_pack_b32_f16 v5, null, exec_lo
v_pack_b32_f16 v5, -1, exec_hi
// GFX12: v_pack_b32_f16 v5, -1, exec_hi ; encoding: [0x05,0x00,0x11,0xd7,0xc1,0xfe,0x00,0x00]
-v_pack_b32_f16 v5, 0.5, -m0 op_sel:[0,0,0]
+v_pack_b32_f16 v5, 0.5, -m0
// GFX12: v_pack_b32_f16 v5, 0.5, -m0 ; encoding: [0x05,0x00,0x11,0xd7,0xf0,0xfa,0x00,0x40]
v_pack_b32_f16 v5, -src_scc, |vcc_lo| op_sel:[1,0,0]
@@ -5201,6 +5201,12 @@ v_pack_b32_f16 v5, -src_scc, |vcc_lo| op_sel:[1,0,0]
v_pack_b32_f16 v255, -|0xfe0b|, -|vcc_hi| op_sel:[0,1,0]
// GFX12: v_pack_b32_f16 v255, -|0xfe0b|, -|vcc_hi| op_sel:[0,1,0] ; encoding: [0xff,0x13,0x11,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
+v_pack_b32_f16 v5, v1.h, v2.l
+// GFX12: v_pack_b32_f16 v5, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x11,0xd7,0x01,0x05,0x02,0x00]
+
+v_pack_b32_f16 v5, v255.l, v255.h
+// GFX12: v_pack_b32_f16 v5, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x11,0xd7,0xff,0xff,0x03,0x00]
+
v_perm_b32 v5, v1, v2, s3
// GFX12: v_perm_b32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x44,0xd6,0x01,0x05,0x0e,0x00]
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s
index c66c102e4a0118..e7b12ec2deff5d 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s
@@ -4355,47 +4355,53 @@ v_or_b16_e64_dpp v5.l, v1.l, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_c
v_or_b16_e64_dpp v255.h, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
// GFX12: v_or_b16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x40,0x63,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
-v_pack_b32_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0]
-// GFX12: v_pack_b32_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+v_pack_b32_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0]
+// GFX12: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
-v_pack_b32_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3]
-// GFX12: v_pack_b32_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+v_pack_b32_f16_e64_dpp v5, v1.l, v2.l quad_perm:[0,1,2,3]
+// GFX12: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
-v_pack_b32_f16_e64_dpp v5, v1, v2 row_mirror
-// GFX12: v_pack_b32_f16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_mirror
+// GFX12: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
-v_pack_b32_f16_e64_dpp v5, v1, v2 row_half_mirror
-// GFX12: v_pack_b32_f16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_half_mirror
+// GFX12: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
-v_pack_b32_f16_e64_dpp v5, v1, v2 row_shl:1
-// GFX12: v_pack_b32_f16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_shl:1
+// GFX12: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
-v_pack_b32_f16_e64_dpp v5, v1, v2 row_shl:15
-// GFX12: v_pack_b32_f16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_shl:15
+// GFX12: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
-v_pack_b32_f16_e64_dpp v5, v1, v2 row_shr:1
-// GFX12: v_pack_b32_f16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_shr:1
+// GFX12: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
-v_pack_b32_f16_e64_dpp v5, v1, v2 row_shr:15
-// GFX12: v_pack_b32_f16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_shr:15
+// GFX12: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
-v_pack_b32_f16_e64_dpp v5, v1, v2 row_ror:1
-// GFX12: v_pack_b32_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_ror:1
+// GFX12: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
-v_pack_b32_f16_e64_dpp v5, v1, v2 row_ror:15
-// GFX12: v_pack_b32_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_ror:15
+// GFX12: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
-v_pack_b32_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// GFX12: v_pack_b32_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
-v_pack_b32_f16_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// GFX12: v_pack_b32_f16_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x11,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+v_pack_b32_f16_e64_dpp v5, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: v_pack_b32_f16_e64_dpp v5, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x11,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
-v_pack_b32_f16_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// GFX12: v_pack_b32_f16_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x02,0x11,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+v_pack_b32_f16_e64_dpp v5, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: v_pack_b32_f16_e64_dpp v5, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x02,0x11,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
-v_pack_b32_f16_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// GFX12: v_pack_b32_f16_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x03,0x11,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+v_pack_b32_f16_e64_dpp v255, -|v255.l|, -|v255.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_pack_b32_f16_e64_dpp v255, -|v255.l|, -|v255.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x03,0x11,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+
+v_pack_b32_f16_e64_dpp v5, -v1.h, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: v_pack_b32_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x0a,0x11,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+
+v_pack_b32_f16_e64_dpp v255, -|v255.l|, -|v255.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_pack_b32_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x13,0x11,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
v_perm_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0]
// GFX12: v_perm_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff]
@@ -5404,11 +5410,11 @@ v_min3_u16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x
v_min3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1
// GFX12: v_min3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4b,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30]
-v_pack_b32_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3
-// GFX12: v_pack_b32_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x11,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+v_pack_b32_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3
+// GFX12: v_pack_b32_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x11,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
-v_pack_b32_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1
-// GFX12: v_pack_b32_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x11,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+v_pack_b32_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1
+// GFX12: v_pack_b32_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x11,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
v_dot2_f16_f16_e64_dpp v0.l, v1, v2, v3.l quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1
// GFX12: v_dot2_f16_f16_e64_dpp v0.l, v1, v2, v3.l quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x00,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00]
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s
index 915bc9f9d8f932..b879e59b3608b4 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s
@@ -2964,17 +2964,23 @@ v_or_b16_e64_dpp v5.l, v1.l, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1
v_or_b16_e64_dpp v255.h, v255.l, v255.l dpp8:[0,0,0,0,0,0,0,0] fi:0
// GFX12: v_or_b16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x40,0x63,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
-v_pack_b32_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: v_pack_b32_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x11,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_pack_b32_f16_e64_dpp v5, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x11,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-v_pack_b32_f16_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: v_pack_b32_f16_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x11,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+v_pack_b32_f16_e64_dpp v5, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_pack_b32_f16_e64_dpp v5, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x11,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
-v_pack_b32_f16_e64_dpp v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
-// GFX12: v_pack_b32_f16_e64_dpp v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x02,0x11,0xd7,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+v_pack_b32_f16_e64_dpp v5, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_pack_b32_f16_e64_dpp v5, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x02,0x11,0xd7,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
-v_pack_b32_f16_e64_dpp v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:0
-// GFX12: v_pack_b32_f16_e64_dpp v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x03,0x11,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+v_pack_b32_f16_e64_dpp v255, -|v255.l|, -|v255.l| dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_pack_b32_f16_e64_dpp v255, -|v255.l|, -|v255.l| dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x03,0x11,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+v_pack_b32_f16_e64_dpp v5, -v1.h, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_pack_b32_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x0a,0x11,0xd7,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+
+v_pack_b32_f16_e64_dpp v255, -|v255.l|, -|v255.h| dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_pack_b32_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x13,0x11,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
v_perm_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0]
// GFX12: v_perm_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x44,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05]
@@ -3673,11 +3679,11 @@ v_min3_u16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0]
v_min3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1
// GFX12: v_min3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x4b,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00]
-v_pack_b32_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: v_pack_b32_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x11,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+v_pack_b32_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_pack_b32_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x11,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
-v_pack_b32_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1
-// GFX12: v_pack_b32_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x11,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+v_pack_b32_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1
+// GFX12: v_pack_b32_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x11,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
v_dot2_f16_f16_e64_dpp v0.l, v1, v2, v3.l dpp8:[0,1,2,3,4,4,4,4]
// GFX12: v_dot2_f16_f16_e64_dpp v0.l, v1, v2, v3.l dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x00,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3.txt
index 39115203e47f50..857da7cd58cfb6 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3.txt
@@ -6439,10 +6439,16 @@
# W64-FAKE16: v_or_b16 v255, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x63,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
0x05,0x00,0x11,0xd7,0x01,0x05,0x02,0x00
-# GFX11: v_pack_b32_f16 v5, v1, v2 ; encoding: [0x05,0x00,0x11,0xd7,0x01,0x05,0x02,0x00]
+# W32-REAL16: v_pack_b32_f16 v5, v1.l, v2.l ; encoding: [0x05,0x00,0x11,0xd7,0x01,0x05,0x02,0x00]
+# W32-FAKE16: v_pack_b32_f16 v5, v1, v2 ; encoding: [0x05,0x00,0x11,0xd7,0x01,0x05,0x02,0x00]
+# W64-REAL16: v_pack_b32_f16 v5, v1.l, v2.l ; encoding: [0x05,0x00,0x11,0xd7,0x01,0x05,0x02,0x00]
+# W64-FAKE16: v_pack_b32_f16 v5, v1, v2 ; encoding: [0x05,0x00,0x11,0xd7,0x01,0x05,0x02,0x00]
0x05,0x00,0x11,0xd7,0xff,0xff,0x03,0x00
-# GFX11: v_pack_b32_f16 v5, v255, v255 ; encoding: [0x05,0x00,0x11,0xd7,0xff,0xff,0x03,0x00]
+# W32-REAL16: v_pack_b32_f16 v5, v255.l, v255.l ; encoding: [0x05,0x00,0x11,0xd7,0xff,0xff,0x03,0x00]
+# W32-FAKE16: v_pack_b32_f16 v5, v255, v255 ; encoding: [0x05,0x00,0x11,0xd7,0xff,0xff,0x03,0x00]
+# W64-REAL16: v_pack_b32_f16 v5, v255.l, v255.l ; encoding: [0x05,0x00,0x11,0xd7,0xff,0xff,0x03,0x00]
+# W64-FAKE16: v_pack_b32_f16 v5, v255, v255 ; encoding: [0x05,0x00,0x11,0xd7,0xff,0xff,0x03,0x00]
0x05,0x00,0x11,0xd7,0x01,0x04,0x00,0x00
# GFX11: v_pack_b32_f16 v5, s1, s2 ; encoding: [0x05,0x00,0x11,0xd7,0x01,0x04,0x00,0x00]
@@ -6483,6 +6489,30 @@
0xff,0x13,0x11,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00
# GFX11: v_pack_b32_f16 v255, -|0xfe0b|, -|vcc_hi| op_sel:[0,1,0] ; encoding: [0xff,0x13,0x11,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
+0x05,0x08,0x11,0xd7,0x01,0x05,0x02,0x00
+# W32-REAL16: v_pack_b32_f16 v5, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x11,0xd7,0x01,0x05,0x02,0x00]
+# W32-FAKE16: v_pack_b32_f16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x11,0xd7,0x01,0x05,0x02,0x00]
+# W64-REAL16: v_pack_b32_f16 v5, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x11,0xd7,0x01,0x05,0x02,0x00]
+# W64-FAKE16: v_pack_b32_f16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x11,0xd7,0x01,0x05,0x02,0x00]
+
+0x05,0x10,0x11,0xd7,0xff,0xff,0x03,0x00
+# W32-REAL16: v_pack_b32_f16 v5, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x11,0xd7,0xff,0xff,0x03,0x00]
+# W32-FAKE16: v_pack_b32_f16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x11,0xd7,0xff,0xff,0x03,0x00]
+# W64-REAL16: v_pack_b32_f16 v5, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x11,0xd7,0xff,0xff,0x03,0x00]
+# W64-FAKE16: v_pack_b32_f16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x11,0xd7,0xff,0xff,0x03,0x00]
+
+0x05,0x08,0x11,0xd7,0x01,0x05,0x02,0x00
+# W32-REAL16: v_pack_b32_f16 v5, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x11,0xd7,0x01,0x05,0x02,0x00]
+# W32-FAKE16: v_pack_b32_f16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x11,0xd7,0x01,0x05,0x02,0x00]
+# W64-REAL16: v_pack_b32_f16 v5, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x11,0xd7,0x01,0x05,0x02,0x00]
+# W64-FAKE16: v_pack_b32_f16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x11,0xd7,0x01,0x05,0x02,0x00]
+
+0x05,0x10,0x11,0xd7,0xff,0xff,0x03,0x00
+# W32-REAL16: v_pack_b32_f16 v5, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x11,0xd7,0xff,0xff,0x03,0x00]
+# W32-FAKE16: v_pack_b32_f16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x11,0xd7,0xff,0xff,0x03,0x00]
+# W64-REAL16: v_pack_b32_f16 v5, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x11,0xd7,0xff,0xff,0x03,0x00]
+# W64-FAKE16: v_pack_b32_f16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x11,0xd7,0xff,0xff,0x03,0x00]
+
0x05,0x00,0x44,0xd6,0x01,0x05,0x0e,0x00
# GFX11: v_perm_b32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x44,0xd6,0x01,0x05,0x0e,0x00]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16.txt
index 693d56ff3a8907..93992cba437716 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16.txt
@@ -5619,46 +5619,100 @@
# W64-FAKE16: v_min3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4b,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30]
0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff
-# GFX11: v_pack_b32_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W32-REAL16: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W32-FAKE16: v_pack_b32_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W64-REAL16: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W64-FAKE16: v_pack_b32_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff
-# GFX11: v_pack_b32_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W32-REAL16: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W32-FAKE16: v_pack_b32_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W64-REAL16: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W64-FAKE16: v_pack_b32_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff
-# GFX11: v_pack_b32_f16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W32-REAL16: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W32-FAKE16: v_pack_b32_f16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W64-REAL16: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W64-FAKE16: v_pack_b32_f16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff
-# GFX11: v_pack_b32_f16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W32-REAL16: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W32-FAKE16: v_pack_b32_f16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W64-REAL16: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W64-FAKE16: v_pack_b32_f16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff
-# GFX11: v_pack_b32_f16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W32-REAL16: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W32-FAKE16: v_pack_b32_f16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W64-REAL16: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W64-FAKE16: v_pack_b32_f16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff
-# GFX11: v_pack_b32_f16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W32-REAL16: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W32-FAKE16: v_pack_b32_f16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W64-REAL16: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W64-FAKE16: v_pack_b32_f16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff
-# GFX11: v_pack_b32_f16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W32-REAL16: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W32-FAKE16: v_pack_b32_f16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W64-REAL16: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W64-FAKE16: v_pack_b32_f16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff
-# GFX11: v_pack_b32_f16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W32-REAL16: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W32-FAKE16: v_pack_b32_f16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W64-REAL16: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W64-FAKE16: v_pack_b32_f16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff
-# GFX11: v_pack_b32_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W32-REAL16: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W32-FAKE16: v_pack_b32_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W64-REAL16: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W64-FAKE16: v_pack_b32_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff
-# GFX11: v_pack_b32_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W32-REAL16: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W32-FAKE16: v_pack_b32_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W64-REAL16: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W64-FAKE16: v_pack_b32_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff
-# GFX11: v_pack_b32_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-REAL16: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-FAKE16: v_pack_b32_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-REAL16: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-FAKE16: v_pack_b32_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
0x05,0x01,0x11,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01
-# GFX11: v_pack_b32_f16_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x11,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W32-REAL16: v_pack_b32_f16_e64_dpp v5, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x11,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_pack_b32_f16_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x11,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_pack_b32_f16_e64_dpp v5, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x11,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_pack_b32_f16_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x11,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
0x05,0x0a,0x11,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13
-# GFX11: v_pack_b32_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x11,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W32-REAL16: v_pack_b32_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x11,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_pack_b32_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x11,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_pack_b32_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x11,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_pack_b32_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x11,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
0xff,0x13,0x11,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30
-# GFX11: v_pack_b32_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x11,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W32-REAL16: v_pack_b32_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x11,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_pack_b32_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x11,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_pack_b32_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x11,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_pack_b32_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x11,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+
+0x05,0x0a,0x11,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13
+# W32-REAL16: v_pack_b32_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x11,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_pack_b32_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x11,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_pack_b32_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x11,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_pack_b32_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x11,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+
+0xff,0x13,0x11,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30
+# W32-REAL16: v_pack_b32_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x11,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_pack_b32_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x11,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_pack_b32_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x11,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_pack_b32_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x11,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff
# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8.txt
index e6d57af1274397..a339d553fb1b66 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8.txt
@@ -3747,16 +3747,40 @@
# W64-FAKE16: v_min3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x4b,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00]
0x05,0x00,0x11,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# GFX11: v_pack_b32_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x11,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x11,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_pack_b32_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x11,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x11,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_pack_b32_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x11,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
0x05,0x01,0x11,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05
-# GFX11: v_pack_b32_f16_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x11,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_pack_b32_f16_e64_dpp v5, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x11,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_pack_b32_f16_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x11,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_pack_b32_f16_e64_dpp v5, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x11,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_pack_b32_f16_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x11,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
0x05,0x0a,0x11,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05
-# GFX11: v_pack_b32_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x11,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_pack_b32_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x11,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_pack_b32_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x11,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_pack_b32_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x11,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_pack_b32_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x11,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
0xff,0x13,0x11,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00
-# GFX11: v_pack_b32_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x11,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W32-REAL16: v_pack_b32_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x11,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_pack_b32_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x11,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_pack_b32_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x11,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_pack_b32_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x11,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+0x05,0x0a,0x11,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05
+# W32-REAL16: v_pack_b32_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x11,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_pack_b32_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x11,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_pack_b32_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x11,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_pack_b32_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x11,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+
+0xff,0x13,0x11,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00
+# W32-REAL16: v_pack_b32_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x11,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_pack_b32_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x11,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_pack_b32_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x11,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_pack_b32_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x11,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
0x05,0x00,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3.txt
index ad116022012df8..d31e96af43d895 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3.txt
@@ -6189,10 +6189,16 @@
# W64-FAKE16: v_or_b16 v255, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x63,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
0x05,0x00,0x11,0xd7,0x01,0x05,0x02,0x00
-# GFX12: v_pack_b32_f16 v5, v1, v2 ; encoding: [0x05,0x00,0x11,0xd7,0x01,0x05,0x02,0x00]
+# W32-REAL16: v_pack_b32_f16 v5, v1.l, v2.l ; encoding: [0x05,0x00,0x11,0xd7,0x01,0x05,0x02,0x00]
+# W32-FAKE16: v_pack_b32_f16 v5, v1, v2 ; encoding: [0x05,0x00,0x11,0xd7,0x01,0x05,0x02,0x00]
+# W64-REAL16: v_pack_b32_f16 v5, v1.l, v2.l ; encoding: [0x05,0x00,0x11,0xd7,0x01,0x05,0x02,0x00]
+# W64-FAKE16: v_pack_b32_f16 v5, v1, v2 ; encoding: [0x05,0x00,0x11,0xd7,0x01,0x05,0x02,0x00]
0x05,0x00,0x11,0xd7,0xff,0xff,0x03,0x00
-# GFX12: v_pack_b32_f16 v5, v255, v255 ; encoding: [0x05,0x00,0x11,0xd7,0xff,0xff,0x03,0x00]
+# W32-REAL16: v_pack_b32_f16 v5, v255.l, v255.l ; encoding: [0x05,0x00,0x11,0xd7,0xff,0xff,0x03,0x00]
+# W32-FAKE16: v_pack_b32_f16 v5, v255, v255 ; encoding: [0x05,0x00,0x11,0xd7,0xff,0xff,0x03,0x00]
+# W64-REAL16: v_pack_b32_f16 v5, v255.l, v255.l ; encoding: [0x05,0x00,0x11,0xd7,0xff,0xff,0x03,0x00]
+# W64-FAKE16: v_pack_b32_f16 v5, v255, v255 ; encoding: [0x05,0x00,0x11,0xd7,0xff,0xff,0x03,0x00]
0x05,0x00,0x11,0xd7,0x01,0x04,0x00,0x00
# GFX12: v_pack_b32_f16 v5, s1, s2 ; encoding: [0x05,0x00,0x11,0xd7,0x01,0x04,0x00,0x00]
@@ -6233,6 +6239,18 @@
0xff,0x13,0x11,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00
# GFX12: v_pack_b32_f16 v255, -|0xfe0b|, -|vcc_hi| op_sel:[0,1,0] ; encoding: [0xff,0x13,0x11,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
+0x05,0x08,0x11,0xd7,0x01,0x05,0x02,0x00
+# W32-REAL16: v_pack_b32_f16 v5, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x11,0xd7,0x01,0x05,0x02,0x00]
+# W32-FAKE16: v_pack_b32_f16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x11,0xd7,0x01,0x05,0x02,0x00]
+# W64-REAL16: v_pack_b32_f16 v5, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x11,0xd7,0x01,0x05,0x02,0x00]
+# W64-FAKE16: v_pack_b32_f16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x11,0xd7,0x01,0x05,0x02,0x00]
+
+0x05,0x10,0x11,0xd7,0xff,0xff,0x03,0x00
+# W32-REAL16: v_pack_b32_f16 v5, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x11,0xd7,0xff,0xff,0x03,0x00]
+# W32-FAKE16: v_pack_b32_f16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x11,0xd7,0xff,0xff,0x03,0x00]
+# W64-REAL16: v_pack_b32_f16 v5, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x11,0xd7,0xff,0xff,0x03,0x00]
+# W64-FAKE16: v_pack_b32_f16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x11,0xd7,0xff,0xff,0x03,0x00]
+
0x05,0x00,0x44,0xd6,0x01,0x05,0x0e,0x00
# GFX12: v_perm_b32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x44,0xd6,0x01,0x05,0x0e,0x00]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp16.txt
index 354bdfb0c24cc3..53c1f2eb0cf125 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp16.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp16.txt
@@ -6126,46 +6126,100 @@
# W64-FAKE16: v_min3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4b,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30]
0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff
-# GFX12: v_pack_b32_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W32-REAL16: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W32-FAKE16: v_pack_b32_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W64-REAL16: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W64-FAKE16: v_pack_b32_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff
-# GFX12: v_pack_b32_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W32-REAL16: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W32-FAKE16: v_pack_b32_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W64-REAL16: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W64-FAKE16: v_pack_b32_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff
-# GFX12: v_pack_b32_f16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W32-REAL16: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W32-FAKE16: v_pack_b32_f16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W64-REAL16: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W64-FAKE16: v_pack_b32_f16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff
-# GFX12: v_pack_b32_f16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W32-REAL16: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W32-FAKE16: v_pack_b32_f16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W64-REAL16: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W64-FAKE16: v_pack_b32_f16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff
-# GFX12: v_pack_b32_f16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W32-REAL16: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W32-FAKE16: v_pack_b32_f16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W64-REAL16: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W64-FAKE16: v_pack_b32_f16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff
-# GFX12: v_pack_b32_f16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W32-REAL16: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W32-FAKE16: v_pack_b32_f16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W64-REAL16: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W64-FAKE16: v_pack_b32_f16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff
-# GFX12: v_pack_b32_f16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W32-REAL16: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W32-FAKE16: v_pack_b32_f16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W64-REAL16: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W64-FAKE16: v_pack_b32_f16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff
-# GFX12: v_pack_b32_f16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W32-REAL16: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W32-FAKE16: v_pack_b32_f16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W64-REAL16: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W64-FAKE16: v_pack_b32_f16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff
-# GFX12: v_pack_b32_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W32-REAL16: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W32-FAKE16: v_pack_b32_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W64-REAL16: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W64-FAKE16: v_pack_b32_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff
-# GFX12: v_pack_b32_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W32-REAL16: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W32-FAKE16: v_pack_b32_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W64-REAL16: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W64-FAKE16: v_pack_b32_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff
-# GFX12: v_pack_b32_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-REAL16: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-FAKE16: v_pack_b32_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-REAL16: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-FAKE16: v_pack_b32_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
0x05,0x01,0x11,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01
-# GFX12: v_pack_b32_f16_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x11,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W32-REAL16: v_pack_b32_f16_e64_dpp v5, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x11,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_pack_b32_f16_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x11,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_pack_b32_f16_e64_dpp v5, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x11,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_pack_b32_f16_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x11,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
0x05,0x0a,0x11,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13
-# GFX12: v_pack_b32_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x11,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W32-REAL16: v_pack_b32_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x11,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_pack_b32_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x11,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_pack_b32_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x11,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_pack_b32_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x11,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
0xff,0x13,0x11,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30
-# GFX12: v_pack_b32_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x11,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W32-REAL16: v_pack_b32_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x11,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_pack_b32_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x11,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_pack_b32_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x11,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_pack_b32_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x11,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+
+0x05,0x0a,0x11,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13
+# W32-REAL16: v_pack_b32_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x11,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_pack_b32_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x11,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_pack_b32_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x11,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_pack_b32_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x11,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+
+0xff,0x13,0x11,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30
+# W32-REAL16: v_pack_b32_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x11,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_pack_b32_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x11,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_pack_b32_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x11,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_pack_b32_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x11,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff
# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp8.txt
index 4b5f7cb2e05261..a77b90c4d31bcd 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp8.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp8.txt
@@ -4191,16 +4191,40 @@
# W64-FAKE16: v_min3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x4b,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00]
0x05,0x00,0x11,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# GFX12: v_pack_b32_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x11,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x11,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_pack_b32_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x11,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x11,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_pack_b32_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x11,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
0x05,0x01,0x11,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05
-# GFX12: v_pack_b32_f16_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x11,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_pack_b32_f16_e64_dpp v5, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x11,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_pack_b32_f16_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x11,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_pack_b32_f16_e64_dpp v5, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x11,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_pack_b32_f16_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x11,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
0x05,0x0a,0x11,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05
-# GFX12: v_pack_b32_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x11,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_pack_b32_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x11,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_pack_b32_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x11,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_pack_b32_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x11,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_pack_b32_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x11,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
0xff,0x13,0x11,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00
-# GFX12: v_pack_b32_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x11,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W32-REAL16: v_pack_b32_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x11,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_pack_b32_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x11,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_pack_b32_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x11,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_pack_b32_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x11,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+0x05,0x0a,0x11,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05
+# W32-REAL16: v_pack_b32_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x11,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_pack_b32_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x11,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_pack_b32_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x11,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_pack_b32_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x11,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+
+0xff,0x13,0x11,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00
+# W32-REAL16: v_pack_b32_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x11,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_pack_b32_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x11,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_pack_b32_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x11,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_pack_b32_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x11,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
0x05,0x00,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
More information about the llvm-commits
mailing list