[llvm] f4319bc - [AMDGPU][MC][GFX11][NFC] Update asm tests for VOP3P instructions
Dmitry Preobrazhensky via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 30 07:05:43 PDT 2022
Author: Dmitry Preobrazhensky
Date: 2022-08-30T17:04:09+03:00
New Revision: f4319bcb0e8b4fc2c43bbb585417d7e95312eab0
URL: https://github.com/llvm/llvm-project/commit/f4319bcb0e8b4fc2c43bbb585417d7e95312eab0
DIFF: https://github.com/llvm/llvm-project/commit/f4319bcb0e8b4fc2c43bbb585417d7e95312eab0.diff
LOG: [AMDGPU][MC][GFX11][NFC] Update asm tests for VOP3P instructions
Differential Revision: https://reviews.llvm.org/D132876
Added:
llvm/test/MC/AMDGPU/gfx11_asm_vop3p_features.s
Modified:
llvm/test/MC/AMDGPU/gfx11_asm_vop3p.s
Removed:
################################################################################
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3p.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3p.s
index dc56baa77e1f..7a703dfc48fd 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3p.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3p.s
@@ -1,213 +1,1252 @@
// RUN: llvm-mc -arch=amdgcn -show-encoding -mcpu=gfx1100 %s | FileCheck --check-prefix=GFX11 %s
-v_pk_fma_f16 v8, v0, s0, v1 clamp
-// GFX11: encoding: [0x08,0xc0,0x0e,0xcc,0x00,0x01,0x04,0x1c]
+v_dot2_f32_bf16 v5, v1, v2, v3
+// GFX11: [0x05,0x40,0x1a,0xcc,0x01,0x05,0x0e,0x1c]
-v_pk_add_u16 v1, v2, v3 clamp
-// GFX11: encoding: [0x01,0xc0,0x0a,0xcc,0x02,0x07,0x02,0x18]
+v_dot2_f32_bf16 v5, v1, v2, s3
+// GFX11: [0x05,0x40,0x1a,0xcc,0x01,0x05,0x0e,0x18]
-v_pk_min_i16 v0, v1, v2 clamp
-// GFX11: encoding: [0x00,0xc0,0x08,0xcc,0x01,0x05,0x02,0x18]
+v_dot2_f32_bf16 v5, v255, v255, s105
+// GFX11: [0x05,0x40,0x1a,0xcc,0xff,0xff,0xa7,0x19]
-v_pk_mul_lo_u16 v0, v1, v2
-// GFX11: encoding: [0x00,0x40,0x01,0xcc,0x01,0x05,0x02,0x18]
+v_dot2_f32_bf16 v5, s1, s2, v255
+// GFX11: [0x05,0x40,0x1a,0xcc,0x01,0x04,0xfc,0x1f]
-v_pk_add_i16 v0, v1, v2
-// GFX11: encoding: [0x00,0x40,0x02,0xcc,0x01,0x05,0x02,0x18]
+v_dot2_f32_bf16 v5, s105, s105, m0
+// GFX11: [0x05,0x40,0x1a,0xcc,0x69,0xd2,0xf4,0x19]
-v_pk_sub_i16 v0, v1, v2
-// GFX11: encoding: [0x00,0x40,0x03,0xcc,0x01,0x05,0x02,0x18]
+v_dot2_f32_bf16 v5, vcc_lo, ttmp15, vcc_lo
+// GFX11: [0x05,0x40,0x1a,0xcc,0x6a,0xf6,0xa8,0x19]
-v_pk_lshlrev_b16 v0, v1, v2
-// GFX11: encoding: [0x00,0x40,0x04,0xcc,0x01,0x05,0x02,0x18]
+v_dot2_f32_bf16 v5, vcc_hi, 0xfe0b, vcc_hi
+// GFX11: [0x05,0x40,0x1a,0xcc,0x6b,0xfe,0xad,0x19,0x0b,0xfe,0x00,0x00]
-v_pk_lshrrev_b16 v0, v1, v2
-// GFX11: encoding: [0x00,0x40,0x05,0xcc,0x01,0x05,0x02,0x18]
+v_dot2_f32_bf16 v5, ttmp15, src_scc, ttmp15
+// GFX11: [0x05,0x40,0x1a,0xcc,0x7b,0xfa,0xed,0x19]
-v_pk_ashrrev_i16 v0, v1, v2
-// GFX11: encoding: [0x00,0x40,0x06,0xcc,0x01,0x05,0x02,0x18]
+v_dot2_f32_bf16 v5, m0, -1, exec_hi
+// GFX11: [0x05,0x40,0x1a,0xcc,0x7d,0x82,0xfd,0x19]
-v_pk_max_i16 v0, v1, v2
-// GFX11: encoding: [0x00,0x40,0x07,0xcc,0x01,0x05,0x02,0x18]
+v_dot2_f32_bf16 v5, exec_lo, null, exec_lo
+// GFX11: [0x05,0x40,0x1a,0xcc,0x7e,0xf8,0xf8,0x19]
-v_pk_min_i16 v0, v1, v2
-// GFX11: encoding: [0x00,0x40,0x08,0xcc,0x01,0x05,0x02,0x18]
+v_dot2_f32_bf16 v5, exec_hi, exec_lo, null
+// GFX11: [0x05,0x40,0x1a,0xcc,0x7f,0xfc,0xf0,0x19]
-v_pk_add_u16 v0, v1, v2
-// GFX11: encoding: [0x00,0x40,0x0a,0xcc,0x01,0x05,0x02,0x18]
+v_dot2_f32_bf16 v5, null, exec_hi, -1
+// GFX11: [0x05,0x40,0x1a,0xcc,0x7c,0xfe,0x04,0x1b]
-v_pk_max_u16 v0, v1, v2
-// GFX11: encoding: [0x00,0x40,0x0c,0xcc,0x01,0x05,0x02,0x18]
+v_dot2_f32_bf16 v5, -1, m0, 0xaf123456
+// GFX11: [0x05,0x40,0x1a,0xcc,0xc1,0xfa,0xfc,0x1b,0x56,0x34,0x12,0xaf]
-v_pk_min_u16 v0, v1, v2
-// GFX11: encoding: [0x00,0x40,0x0d,0xcc,0x01,0x05,0x02,0x18]
+v_dot2_f32_bf16 v5, src_scc, vcc_lo, src_scc neg_lo:[1,0,0] neg_hi:[1,0,0]
+// GFX11: [0x05,0x41,0x1a,0xcc,0xfd,0xd4,0xf4,0x3b]
-v_pk_fma_f16 v0, v1, v2, v3
-// GFX11: encoding: [0x00,0x40,0x0e,0xcc,0x01,0x05,0x0e,0x1c]
+v_dot2_f32_bf16 v255, 0xfe0b, vcc_hi, 0.5 neg_lo:[0,0,0] neg_hi:[0,0,0] clamp
+// GFX11: [0xff,0xc0,0x1a,0xcc,0xff,0xd6,0xc0,0x1b,0x0b,0xfe,0x00,0x00]
-v_pk_add_f16 v0, v1, v2
-// GFX11: encoding: [0x00,0x40,0x0f,0xcc,0x01,0x05,0x02,0x18]
+v_dot2_f32_f16 v5, v1, v2, s3
+// GFX11: [0x05,0x40,0x13,0xcc,0x01,0x05,0x0e,0x18]
-v_pk_mul_f16 v0, v1, v2
-// GFX11: encoding: [0x00,0x40,0x10,0xcc,0x01,0x05,0x02,0x18]
+v_dot2_f32_f16 v5, v255, s2, s105
+// GFX11: [0x05,0x40,0x13,0xcc,0xff,0x05,0xa4,0x19]
-v_pk_min_f16 v0, v1, v2
-// GFX11: encoding: [0x00,0x40,0x11,0xcc,0x01,0x05,0x02,0x18]
+v_dot2_f32_f16 v5, s1, v255, exec_hi
+// GFX11: [0x05,0x40,0x13,0xcc,0x01,0xfe,0xff,0x19]
-v_pk_max_f16 v0, v1, v2
-// GFX11: encoding: [0x00,0x40,0x12,0xcc,0x01,0x05,0x02,0x18]
+v_dot2_f32_f16 v5, s105, s105, exec_lo
+// GFX11: [0x05,0x40,0x13,0xcc,0x69,0xd2,0xf8,0x19]
-//
-// Test op_sel/op_sel_hi
-//
+v_dot2_f32_f16 v5, vcc_lo, ttmp15, v3
+// GFX11: [0x05,0x40,0x13,0xcc,0x6a,0xf6,0x0c,0x1c]
-v_pk_add_u16 v1, v2, v3
-// GFX11: encoding: [0x01,0x40,0x0a,0xcc,0x02,0x07,0x02,0x18]
+v_dot2_f32_f16 v5, vcc_hi, 0xfe0b, v255
+// GFX11: [0x05,0x40,0x13,0xcc,0x6b,0xfe,0xfd,0x1f,0x0b,0xfe,0x00,0x00]
-v_pk_add_u16 v1, v2, v3 op_sel:[0,0]
-// GFX11: v_pk_add_u16 v1, v2, v3 ; encoding: [0x01,0x40,0x0a,0xcc,0x02,0x07,0x02,0x18]
+v_dot2_f32_f16 v5, ttmp15, src_scc, ttmp15
+// GFX11: [0x05,0x40,0x13,0xcc,0x7b,0xfa,0xed,0x19]
-v_pk_add_u16 v1, v2, v3 op_sel_hi:[1,1]
-// GFX11: v_pk_add_u16 v1, v2, v3 ; encoding: [0x01,0x40,0x0a,0xcc,0x02,0x07,0x02,0x18]
+v_dot2_f32_f16 v5, m0, 0.5, m0
+// GFX11: [0x05,0x40,0x13,0xcc,0x7d,0xe0,0xf5,0x19]
-v_pk_add_u16 v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,1]
-// GFX11: v_pk_add_u16 v1, v2, v3 ; encoding: [0x01,0x40,0x0a,0xcc,0x02,0x07,0x02,0x18]
+v_dot2_f32_f16 v5, exec_lo, -1, vcc_hi
+// GFX11: [0x05,0x40,0x13,0xcc,0x7e,0x82,0xad,0x19]
-v_pk_add_u16 v1, v2, v3 op_sel_hi:[0,0]
-// GFX11: v_pk_add_u16 v1, v2, v3 op_sel_hi:[0,0] ; encoding: [0x01,0x40,0x0a,0xcc,0x02,0x07,0x02,0x00]
+v_dot2_f32_f16 v5, exec_hi, null, vcc_lo
+// GFX11: [0x05,0x40,0x13,0xcc,0x7f,0xf8,0xa8,0x19]
-v_pk_add_u16 v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,0]
-// GFX11: v_pk_add_u16 v1, v2, v3 op_sel_hi:[0,0] ; encoding: [0x01,0x40,0x0a,0xcc,0x02,0x07,0x02,0x00]
+v_dot2_f32_f16 v5, null, exec_lo, 0xaf123456
+// GFX11: [0x05,0x40,0x13,0xcc,0x7c,0xfc,0xfc,0x1b,0x56,0x34,0x12,0xaf]
-v_pk_add_u16 v1, v2, v3 op_sel:[1,0]
-// GFX11: v_pk_add_u16 v1, v2, v3 op_sel:[1,0] ; encoding: [0x01,0x48,0x0a,0xcc,0x02,0x07,0x02,0x18]
+v_dot2_f32_f16 v5, -1, exec_hi, src_scc
+// GFX11: [0x05,0x40,0x13,0xcc,0xc1,0xfe,0xf4,0x1b]
-v_pk_add_u16 v1, v2, v3 op_sel:[0,1]
-// GFX11: v_pk_add_u16 v1, v2, v3 op_sel:[0,1] ; encoding: [0x01,0x50,0x0a,0xcc,0x02,0x07,0x02,0x18]
+v_dot2_f32_f16 v5, 0.5, m0, 0.5 neg_lo:[1,0,0] neg_hi:[1,0,0]
+// GFX11: [0x05,0x41,0x13,0xcc,0xf0,0xfa,0xc0,0x3b]
-v_pk_add_u16 v1, v2, v3 op_sel:[1,1]
-// GFX11: v_pk_add_u16 v1, v2, v3 op_sel:[1,1] ; encoding: [0x01,0x58,0x0a,0xcc,0x02,0x07,0x02,0x18]
+v_dot2_f32_f16 v5, src_scc, vcc_lo, -1 neg_lo:[0,1,0] neg_hi:[0,1,0]
+// GFX11: [0x05,0x42,0x13,0xcc,0xfd,0xd4,0x04,0x5b]
-v_pk_add_u16 v1, v2, v3 op_sel_hi:[0,1]
-// GFX11: v_pk_add_u16 v1, v2, v3 op_sel_hi:[0,1] ; encoding: [0x01,0x40,0x0a,0xcc,0x02,0x07,0x02,0x10]
+v_dot2_f32_f16 v255, 0xfe0b, vcc_hi, null neg_lo:[0,0,0] neg_hi:[0,0,0] clamp
+// GFX11: [0xff,0xc0,0x13,0xcc,0xff,0xd6,0xf0,0x19,0x0b,0xfe,0x00,0x00]
-v_pk_add_u16 v1, v2, v3 op_sel_hi:[1,0]
-// GFX11: v_pk_add_u16 v1, v2, v3 op_sel_hi:[1,0] ; encoding: [0x01,0x40,0x0a,0xcc,0x02,0x07,0x02,0x08]
+v_dot4_i32_iu8 v5, v1, v2, s3
+// GFX11: [0x05,0x40,0x16,0xcc,0x01,0x05,0x0e,0x18]
-v_pk_add_u16 v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,1]
-// GFX11: v_pk_add_u16 v1, v2, v3 op_sel:[1,1] ; encoding: [0x01,0x58,0x0a,0xcc,0x02,0x07,0x02,0x18]
+v_dot4_i32_iu8 v5, v255, s2, s105
+// GFX11: [0x05,0x40,0x16,0xcc,0xff,0x05,0xa4,0x19]
-v_pk_add_u16 v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,0]
-// GFX11: v_pk_add_u16 v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,0] ; encoding: [0x01,0x48,0x0a,0xcc,0x02,0x07,0x02,0x08]
+v_dot4_i32_iu8 v5, s1, v255, exec_hi
+// GFX11: [0x05,0x40,0x16,0xcc,0x01,0xfe,0xff,0x19]
-v_pk_add_u16 v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,1]
-// GFX11: v_pk_add_u16 v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,1] ; encoding: [0x01,0x50,0x0a,0xcc,0x02,0x07,0x02,0x10]
+v_dot4_i32_iu8 v5, s105, s105, exec_lo
+// GFX11: [0x05,0x40,0x16,0xcc,0x69,0xd2,0xf8,0x19]
-v_pk_add_u16 v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,1]
-// GFX11: v_pk_add_u16 v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,1] ; encoding: [0x01,0x48,0x0a,0xcc,0x02,0x07,0x02,0x10]
+v_dot4_i32_iu8 v5, vcc_lo, ttmp15, v3
+// GFX11: [0x05,0x40,0x16,0xcc,0x6a,0xf6,0x0c,0x1c]
-v_pk_add_u16 v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,0]
-// GFX11: v_pk_add_u16 v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,0] ; encoding: [0x01,0x50,0x0a,0xcc,0x02,0x07,0x02,0x08]
+v_dot4_i32_iu8 v5, vcc_hi, 0xaf123456, v255
+// GFX11: [0x05,0x40,0x16,0xcc,0x6b,0xfe,0xfd,0x1f,0x56,0x34,0x12,0xaf]
-//
-// Test src2 op_sel/op_sel_hi
-//
-v_pk_fma_f16 v8, v0, s0, v1
-// GFX11: encoding: [0x08,0x40,0x0e,0xcc,0x00,0x01,0x04,0x1c]
+v_dot4_i32_iu8 v5, ttmp15, src_scc, ttmp15
+// GFX11: [0x05,0x40,0x16,0xcc,0x7b,0xfa,0xed,0x19]
-v_pk_fma_f16 v8, v0, s0, v1 neg_lo:[0,0,0] neg_hi:[0,0,0]
-// GFX11: v_pk_fma_f16 v8, v0, s0, v1 ; encoding: [0x08,0x40,0x0e,0xcc,0x00,0x01,0x04,0x1c]
+v_dot4_i32_iu8 v5, m0, 0.5, m0
+// GFX11: [0x05,0x40,0x16,0xcc,0x7d,0xe0,0xf5,0x19]
-v_pk_fma_f16 v8, v0, s0, v1 op_sel:[0,0,0] op_sel_hi:[1,1,1] neg_lo:[0,0,0] neg_hi:[0,0,0]
-// GFX11: v_pk_fma_f16 v8, v0, s0, v1 ; encoding: [0x08,0x40,0x0e,0xcc,0x00,0x01,0x04,0x1c]
+v_dot4_i32_iu8 v5, exec_lo, -1, vcc_hi
+// GFX11: [0x05,0x40,0x16,0xcc,0x7e,0x82,0xad,0x19]
-v_pk_fma_f16 v8, v0, s0, v1 op_sel:[0,0,0] op_sel_hi:[1,1,1]
-// GFX11: v_pk_fma_f16 v8, v0, s0, v1 ; encoding: [0x08,0x40,0x0e,0xcc,0x00,0x01,0x04,0x1c]
+v_dot4_i32_iu8 v5, exec_hi, null, vcc_lo
+// GFX11: [0x05,0x40,0x16,0xcc,0x7f,0xf8,0xa8,0x19]
-v_pk_fma_f16 v8, v0, s0, v1 op_sel:[0,0,0] op_sel_hi:[0,0,0]
-// GFX11: v_pk_fma_f16 v8, v0, s0, v1 op_sel_hi:[0,0,0] ; encoding: [0x08,0x00,0x0e,0xcc,0x00,0x01,0x04,0x04]
+v_dot4_i32_iu8 v5, null, exec_lo, 0xaf123456
+// GFX11: [0x05,0x40,0x16,0xcc,0x7c,0xfc,0xfc,0x1b,0x56,0x34,0x12,0xaf]
-v_pk_fma_f16 v8, v0, s0, v1 op_sel:[0,0,1] op_sel_hi:[0,0,1]
-// GFX11: v_pk_fma_f16 v8, v0, s0, v1 op_sel:[0,0,1] op_sel_hi:[0,0,1] ; encoding: [0x08,0x60,0x0e,0xcc,0x00,0x01,0x04,0x04]
+v_dot4_i32_iu8 v5, -1, exec_hi, src_scc
+// GFX11: [0x05,0x40,0x16,0xcc,0xc1,0xfe,0xf4,0x1b]
-//
-// Test neg_lo/neg_hi
-//
+v_dot4_i32_iu8 v5, 0.5, m0, 0.5 neg_lo:[1,0,0]
+// GFX11: [0x05,0x40,0x16,0xcc,0xf0,0xfa,0xc0,0x3b]
-v_pk_fma_f16 v8, v0, s0, v1 neg_lo:[1,1,1]
-// GFX11: v_pk_fma_f16 v8, v0, s0, v1 neg_lo:[1,1,1] ; encoding: [0x08,0x40,0x0e,0xcc,0x00,0x01,0x04,0xfc]
+v_dot4_i32_iu8 v5, src_scc, vcc_lo, -1 neg_lo:[0,1,0]
+// GFX11: [0x05,0x40,0x16,0xcc,0xfd,0xd4,0x04,0x5b]
-v_pk_fma_f16 v8, v0, s0, v1 neg_hi:[1,1,1]
-// GFX11: v_pk_fma_f16 v8, v0, s0, v1 neg_hi:[1,1,1] ; encoding: [0x08,0x47,0x0e,0xcc,0x00,0x01,0x04,0x1c]
+v_dot4_i32_iu8 v255, 0xaf123456, vcc_hi, null neg_lo:[0,0,0]
+// GFX11: [0xff,0x40,0x16,0xcc,0xff,0xd6,0xf0,0x19,0x56,0x34,0x12,0xaf]
-v_pk_fma_f16 v8, v0, s0, v1 neg_lo:[1,1,1] neg_hi:[1,1,1]
-// GFX11: v_pk_fma_f16 v8, v0, s0, v1 neg_lo:[1,1,1] neg_hi:[1,1,1] ; encoding: [0x08,0x47,0x0e,0xcc,0x00,0x01,0x04,0xfc]
+v_dot4_u32_u8 v5, v1, v2, s3
+// GFX11: [0x05,0x40,0x17,0xcc,0x01,0x05,0x0e,0x18]
-v_pk_fma_f16 v8, v0, s0, v1 neg_lo:[1,0,0]
-// GFX11: v_pk_fma_f16 v8, v0, s0, v1 neg_lo:[1,0,0] ; encoding: [0x08,0x40,0x0e,0xcc,0x00,0x01,0x04,0x3c]
+v_dot4_u32_u8 v5, v255, s2, s105
+// GFX11: [0x05,0x40,0x17,0xcc,0xff,0x05,0xa4,0x19]
-v_pk_fma_f16 v8, v0, s0, v1 neg_lo:[0,1,0]
-// GFX11: v_pk_fma_f16 v8, v0, s0, v1 neg_lo:[0,1,0] ; encoding: [0x08,0x40,0x0e,0xcc,0x00,0x01,0x04,0x5c]
+v_dot4_u32_u8 v5, s1, v255, exec_hi
+// GFX11: [0x05,0x40,0x17,0xcc,0x01,0xfe,0xff,0x19]
-v_pk_fma_f16 v8, v0, s0, v1 neg_lo:[0,0,1]
-// GFX11: v_pk_fma_f16 v8, v0, s0, v1 neg_lo:[0,0,1] ; encoding: [0x08,0x40,0x0e,0xcc,0x00,0x01,0x04,0x9c]
+v_dot4_u32_u8 v5, s105, s105, exec_lo
+// GFX11: [0x05,0x40,0x17,0xcc,0x69,0xd2,0xf8,0x19]
-v_pk_fma_f16 v8, v0, s0, v1 neg_hi:[1,0,0]
-// GFX11: v_pk_fma_f16 v8, v0, s0, v1 neg_hi:[1,0,0] ; encoding: [0x08,0x41,0x0e,0xcc,0x00,0x01,0x04,0x1c]
+v_dot4_u32_u8 v5, vcc_lo, ttmp15, v3
+// GFX11: [0x05,0x40,0x17,0xcc,0x6a,0xf6,0x0c,0x1c]
-v_pk_fma_f16 v8, v0, s0, v1 neg_hi:[0,1,0]
-// GFX11: v_pk_fma_f16 v8, v0, s0, v1 neg_hi:[0,1,0] ; encoding: [0x08,0x42,0x0e,0xcc,0x00,0x01,0x04,0x1c]
+v_dot4_u32_u8 v5, vcc_hi, 0xaf123456, v255
+// GFX11: [0x05,0x40,0x17,0xcc,0x6b,0xfe,0xfd,0x1f,0x56,0x34,0x12,0xaf]
-v_pk_fma_f16 v8, v0, s0, v1 neg_hi:[0,0,1]
-// GFX11: v_pk_fma_f16 v8, v0, s0, v1 neg_hi:[0,0,1] ; encoding: [0x08,0x44,0x0e,0xcc,0x00,0x01,0x04,0x1c]
+v_dot4_u32_u8 v5, ttmp15, src_scc, ttmp15
+// GFX11: [0x05,0x40,0x17,0xcc,0x7b,0xfa,0xed,0x19]
-//
-// DOT
-//
-v_dot4_i32_iu8 v3, v4, v5, v6
-// GFX11: v_dot4_i32_iu8 v3, v4, v5, v6 ; encoding: [0x03,0x40,0x16,0xcc,0x04,0x0b,0x1a,0x1c]
+v_dot4_u32_u8 v5, m0, 0.5, m0
+// GFX11: [0x05,0x40,0x17,0xcc,0x7d,0xe0,0xf5,0x19]
-v_dot4_i32_iu8 v3, v4, v5, 0xf neg_lo:[1,1]
-// GFX11: v_dot4_i32_iu8 v3, v4, v5, 15 neg_lo:[1,1,0] ; encoding: [0x03,0x40,0x16,0xcc,0x04,0x0b,0x3e,0x7a]
+v_dot4_u32_u8 v5, exec_lo, -1, vcc_hi
+// GFX11: [0x05,0x40,0x17,0xcc,0x7e,0x82,0xad,0x19]
-v_dot4_u32_u8 v3, v4, v5, v6
-// GFX11: v_dot4_u32_u8 v3, v4, v5, v6 ; encoding: [0x03,0x40,0x17,0xcc,0x04,0x0b,0x1a,0x1c]
+v_dot4_u32_u8 v5, exec_hi, null, vcc_lo
+// GFX11: [0x05,0x40,0x17,0xcc,0x7f,0xf8,0xa8,0x19]
-v_dot4_i32_iu8 v3, v4, v5, 0xf
-// GFX11: v_dot4_i32_iu8 v3, v4, v5, 15 ; encoding: [0x03,0x40,0x16,0xcc,0x04,0x0b,0x3e,0x1a]
+v_dot4_u32_u8 v5, null, exec_lo, 0xaf123456
+// GFX11: [0x05,0x40,0x17,0xcc,0x7c,0xfc,0xfc,0x1b,0x56,0x34,0x12,0xaf]
-v_dot8_i32_iu4 v3, v4, v5, 0xf neg_lo:[1,0]
-// GFX11: v_dot8_i32_iu4 v3, v4, v5, 15 neg_lo:[1,0,0] ; encoding: [0x03,0x40,0x18,0xcc,0x04,0x0b,0x3e,0x3a]
+v_dot4_u32_u8 v5, -1, exec_hi, src_scc
+// GFX11: [0x05,0x40,0x17,0xcc,0xc1,0xfe,0xf4,0x1b]
-v_dot8_i32_iu4 v3, v4, v5, v0 neg_lo:[0,0]
-// GFX11: v_dot8_i32_iu4 v3, v4, v5, v0 ; encoding: [0x03,0x40,0x18,0xcc,0x04,0x0b,0x02,0x1c]
+v_dot4_u32_u8 v5, 0.5, m0, 0.5
+// GFX11: [0x05,0x40,0x17,0xcc,0xf0,0xfa,0xc0,0x1b]
-v_dot8_u32_u4 v0, v1, v2, v3
-// GFX11: v_dot8_u32_u4 v0, v1, v2, v3 ; encoding: [0x00,0x40,0x19,0xcc,0x01,0x05,0x0e,0x1c]
+v_dot4_u32_u8 v5, src_scc, vcc_lo, -1
+// GFX11: [0x05,0x40,0x17,0xcc,0xfd,0xd4,0x04,0x1b]
-v_dot2_f32_f16 v0, v1, v2, v3
-// GFX11: v_dot2_f32_f16 v0, v1, v2, v3 ; encoding: [0x00,0x40,0x13,0xcc,0x01,0x05,0x0e,0x1c]
+v_dot4_u32_u8 v255, 0xaf123456, vcc_hi, null
+// GFX11: [0xff,0x40,0x17,0xcc,0xff,0xd6,0xf0,0x19,0x56,0x34,0x12,0xaf]
-v_dot2_f32_f16 v0, v1, v2, v3 neg_lo:[1,1,0] neg_hi:[1,0,1]
-// GFX11: v_dot2_f32_f16 v0, v1, v2, v3 neg_lo:[1,1,0] neg_hi:[1,0,1] ; encoding: [0x00,0x45,0x13,0xcc,0x01,0x05,0x0e,0x7c]
+v_dot8_i32_iu4 v5, v1, v2, s3
+// GFX11: [0x05,0x40,0x18,0xcc,0x01,0x05,0x0e,0x18]
-v_dot2_f32_bf16 v0, v1, v2, v3
-// GFX11: v_dot2_f32_bf16 v0, v1, v2, v3 ; encoding: [0x00,0x40,0x1a,0xcc,0x01,0x05,0x0e,0x1c]
+v_dot8_i32_iu4 v5, v255, s2, s105
+// GFX11: [0x05,0x40,0x18,0xcc,0xff,0x05,0xa4,0x19]
-v_dot2_f32_bf16 v0, v1, v2, v3 neg_lo:[1,1,0] neg_hi:[1,0,1]
-// GFX11: v_dot2_f32_bf16 v0, v1, v2, v3 neg_lo:[1,1,0] neg_hi:[1,0,1] ; encoding: [0x00,0x45,0x1a,0xcc,0x01,0x05,0x0e,0x3c]
+v_dot8_i32_iu4 v5, s1, v255, exec_hi
+// GFX11: [0x05,0x40,0x18,0xcc,0x01,0xfe,0xff,0x19]
-//
-// FMA_MIX
-//
-v_fma_mix_f32 v0, v1, v2, v3
-// GFX11: v_fma_mix_f32 v0, v1, v2, v3 ; encoding: [0x00,0x00,0x20,0xcc,0x01,0x05,0x0e,0x04]
+v_dot8_i32_iu4 v5, s105, s105, exec_lo
+// GFX11: [0x05,0x40,0x18,0xcc,0x69,0xd2,0xf8,0x19]
-v_fma_mix_f32 v0, v1, v2, v3 op_sel:[0,0,1]
-// GFX11: v_fma_mix_f32 v0, v1, v2, v3 op_sel:[0,0,1] ; encoding: [0x00,0x20,0x20,0xcc,0x01,0x05,0x0e,0x04]
+v_dot8_i32_iu4 v5, vcc_lo, ttmp15, v3
+// GFX11: [0x05,0x40,0x18,0xcc,0x6a,0xf6,0x0c,0x1c]
-v_fma_mixlo_f16 v0, abs(v1), -v2, abs(v3)
-// GFX11: v_fma_mixlo_f16 v0, |v1|, -v2, |v3| ; encoding: [0x00,0x05,0x21,0xcc,0x01,0x05,0x0e,0x44]
+v_dot8_i32_iu4 v5, vcc_hi, 0xaf123456, v255
+// GFX11: [0x05,0x40,0x18,0xcc,0x6b,0xfe,0xfd,0x1f,0x56,0x34,0x12,0xaf]
-v_fma_mixhi_f16 v0, v1, v2, v3 op_sel_hi:[1,1,1] clamp
-// GFX11: v_fma_mixhi_f16 v0, v1, v2, v3 op_sel_hi:[1,1,1] clamp ; encoding: [0x00,0xc0,0x22,0xcc,0x01,0x05,0x0e,0x1c]
+v_dot8_i32_iu4 v5, ttmp15, src_scc, ttmp15
+// GFX11: [0x05,0x40,0x18,0xcc,0x7b,0xfa,0xed,0x19]
+
+v_dot8_i32_iu4 v5, m0, 0.5, m0
+// GFX11: [0x05,0x40,0x18,0xcc,0x7d,0xe0,0xf5,0x19]
+
+v_dot8_i32_iu4 v5, exec_lo, -1, vcc_hi
+// GFX11: [0x05,0x40,0x18,0xcc,0x7e,0x82,0xad,0x19]
+
+v_dot8_i32_iu4 v5, exec_hi, null, vcc_lo
+// GFX11: [0x05,0x40,0x18,0xcc,0x7f,0xf8,0xa8,0x19]
+
+v_dot8_i32_iu4 v5, null, exec_lo, 0xaf123456
+// GFX11: [0x05,0x40,0x18,0xcc,0x7c,0xfc,0xfc,0x1b,0x56,0x34,0x12,0xaf]
+
+v_dot8_i32_iu4 v5, -1, exec_hi, src_scc
+// GFX11: [0x05,0x40,0x18,0xcc,0xc1,0xfe,0xf4,0x1b]
+
+v_dot8_i32_iu4 v5, 0.5, m0, 0.5 neg_lo:[1,0,0]
+// GFX11: [0x05,0x40,0x18,0xcc,0xf0,0xfa,0xc0,0x3b]
+
+v_dot8_i32_iu4 v5, src_scc, vcc_lo, -1 neg_lo:[0,1,0]
+// GFX11: [0x05,0x40,0x18,0xcc,0xfd,0xd4,0x04,0x5b]
+
+v_dot8_i32_iu4 v255, 0xaf123456, vcc_hi, null neg_lo:[0,0,0] clamp
+// GFX11: [0xff,0xc0,0x18,0xcc,0xff,0xd6,0xf0,0x19,0x56,0x34,0x12,0xaf]
+
+v_dot8_u32_u4 v5, v1, v2, s3
+// GFX11: [0x05,0x40,0x19,0xcc,0x01,0x05,0x0e,0x18]
+
+v_dot8_u32_u4 v5, v255, s2, s105
+// GFX11: [0x05,0x40,0x19,0xcc,0xff,0x05,0xa4,0x19]
+
+v_dot8_u32_u4 v5, s1, v255, exec_hi
+// GFX11: [0x05,0x40,0x19,0xcc,0x01,0xfe,0xff,0x19]
+
+v_dot8_u32_u4 v5, s105, s105, exec_lo
+// GFX11: [0x05,0x40,0x19,0xcc,0x69,0xd2,0xf8,0x19]
+
+v_dot8_u32_u4 v5, vcc_lo, ttmp15, v3
+// GFX11: [0x05,0x40,0x19,0xcc,0x6a,0xf6,0x0c,0x1c]
+
+v_dot8_u32_u4 v5, vcc_hi, 0xaf123456, v255
+// GFX11: [0x05,0x40,0x19,0xcc,0x6b,0xfe,0xfd,0x1f,0x56,0x34,0x12,0xaf]
+
+v_dot8_u32_u4 v5, ttmp15, src_scc, ttmp15
+// GFX11: [0x05,0x40,0x19,0xcc,0x7b,0xfa,0xed,0x19]
+
+v_dot8_u32_u4 v5, m0, 0.5, m0
+// GFX11: [0x05,0x40,0x19,0xcc,0x7d,0xe0,0xf5,0x19]
+
+v_dot8_u32_u4 v5, exec_lo, -1, vcc_hi
+// GFX11: [0x05,0x40,0x19,0xcc,0x7e,0x82,0xad,0x19]
+
+v_dot8_u32_u4 v5, exec_hi, null, vcc_lo
+// GFX11: [0x05,0x40,0x19,0xcc,0x7f,0xf8,0xa8,0x19]
+
+v_dot8_u32_u4 v5, null, exec_lo, 0xaf123456
+// GFX11: [0x05,0x40,0x19,0xcc,0x7c,0xfc,0xfc,0x1b,0x56,0x34,0x12,0xaf]
+
+v_dot8_u32_u4 v5, -1, exec_hi, src_scc
+// GFX11: [0x05,0x40,0x19,0xcc,0xc1,0xfe,0xf4,0x1b]
+
+v_dot8_u32_u4 v5, 0.5, m0, 0.5
+// GFX11: [0x05,0x40,0x19,0xcc,0xf0,0xfa,0xc0,0x1b]
+
+v_dot8_u32_u4 v5, src_scc, vcc_lo, -1
+// GFX11: [0x05,0x40,0x19,0xcc,0xfd,0xd4,0x04,0x1b]
+
+v_dot8_u32_u4 v255, 0xaf123456, vcc_hi, null clamp
+// GFX11: [0xff,0xc0,0x19,0xcc,0xff,0xd6,0xf0,0x19,0x56,0x34,0x12,0xaf]
+
+v_fma_mix_f32 v5, v1, v2, s3
+// GFX11: [0x05,0x00,0x20,0xcc,0x01,0x05,0x0e,0x00]
+
+v_fma_mix_f32 v5, v255, v255, s105
+// GFX11: [0x05,0x00,0x20,0xcc,0xff,0xff,0xa7,0x01]
+
+v_fma_mix_f32 v5, s1, s2, v3
+// GFX11: [0x05,0x00,0x20,0xcc,0x01,0x04,0x0c,0x04]
+
+v_fma_mix_f32 v5, s105, s105, m0
+// GFX11: [0x05,0x00,0x20,0xcc,0x69,0xd2,0xf4,0x01]
+
+v_fma_mix_f32 v5, vcc_lo, ttmp15, ttmp15
+// GFX11: [0x05,0x00,0x20,0xcc,0x6a,0xf6,0xec,0x01]
+
+v_fma_mix_f32 v5, vcc_hi, src_scc, v255
+// GFX11: [0x05,0x00,0x20,0xcc,0x6b,0xfa,0xfd,0x07]
+
+v_fma_mix_f32 v5, |ttmp15|, 0.5, -vcc_hi
+// GFX11: [0x05,0x01,0x20,0xcc,0x7b,0xe0,0xad,0x81]
+
+v_fma_mix_f32 v5, -m0, -1, |vcc_lo|
+// GFX11: [0x05,0x04,0x20,0xcc,0x7d,0x82,0xa9,0x21]
+
+v_fma_mix_f32 v5, -|exec_lo|, null, -|src_scc|
+// GFX11: [0x05,0x05,0x20,0xcc,0x7e,0xf8,0xf4,0xa3]
+
+v_fma_mix_f32 v5, -|exec_hi|, -|exec_lo|, -|exec_lo| op_sel:[1,1,1] op_sel_hi:[1,1,1]
+// GFX11: [0x05,0x7f,0x20,0xcc,0x7f,0xfc,0xf8,0xf9]
+
+v_fma_mix_f32 v5, null, exec_hi, 0.5 op_sel:[0,0,0] op_sel_hi:[0,0,1]
+// GFX11: [0x05,0x40,0x20,0xcc,0x7c,0xfe,0xc0,0x03]
+
+v_fma_mix_f32 v5, -1, -|m0|, -1 op_sel:[1,0,0] op_sel_hi:[0,1,0]
+// GFX11: [0x05,0x0a,0x20,0xcc,0xc1,0xfa,0x04,0x53]
+
+v_fma_mix_f32 v5, 0.5, -|vcc_lo|, -|exec_hi| op_sel:[0,1,0] op_sel_hi:[1,0,0]
+// GFX11: [0x05,0x16,0x20,0xcc,0xf0,0xd4,0xfc,0xc9]
+
+v_fma_mix_f32 v255, -|src_scc|, -|vcc_hi|, null op_sel:[0,0,1] op_sel_hi:[0,0,0] clamp
+// GFX11: [0xff,0xa3,0x20,0xcc,0xfd,0xd6,0xf0,0x61]
+
+v_fma_mixhi_f16 v5, v1, v2, s3
+// GFX11: [0x05,0x00,0x22,0xcc,0x01,0x05,0x0e,0x00]
+
+v_fma_mixhi_f16 v5, v255, v255, s105
+// GFX11: [0x05,0x00,0x22,0xcc,0xff,0xff,0xa7,0x01]
+
+v_fma_mixhi_f16 v5, s1, s2, v3
+// GFX11: [0x05,0x00,0x22,0xcc,0x01,0x04,0x0c,0x04]
+
+v_fma_mixhi_f16 v5, s105, s105, m0
+// GFX11: [0x05,0x00,0x22,0xcc,0x69,0xd2,0xf4,0x01]
+
+v_fma_mixhi_f16 v5, vcc_lo, ttmp15, ttmp15
+// GFX11: [0x05,0x00,0x22,0xcc,0x6a,0xf6,0xec,0x01]
+
+v_fma_mixhi_f16 v5, vcc_hi, src_scc, v255
+// GFX11: [0x05,0x00,0x22,0xcc,0x6b,0xfa,0xfd,0x07]
+
+v_fma_mixhi_f16 v5, |ttmp15|, 0.5, -vcc_hi
+// GFX11: [0x05,0x01,0x22,0xcc,0x7b,0xe0,0xad,0x81]
+
+v_fma_mixhi_f16 v5, -m0, -1, |vcc_lo|
+// GFX11: [0x05,0x04,0x22,0xcc,0x7d,0x82,0xa9,0x21]
+
+v_fma_mixhi_f16 v5, -|exec_lo|, null, -|src_scc|
+// GFX11: [0x05,0x05,0x22,0xcc,0x7e,0xf8,0xf4,0xa3]
+
+v_fma_mixhi_f16 v5, -|exec_hi|, -|exec_lo|, -|exec_lo| op_sel:[1,1,1] op_sel_hi:[1,1,1]
+// GFX11: [0x05,0x7f,0x22,0xcc,0x7f,0xfc,0xf8,0xf9]
+
+v_fma_mixhi_f16 v5, null, exec_hi, 0.5 op_sel:[0,0,0] op_sel_hi:[0,0,1]
+// GFX11: [0x05,0x40,0x22,0xcc,0x7c,0xfe,0xc0,0x03]
+
+v_fma_mixhi_f16 v5, -1, -|m0|, -1 op_sel:[1,0,0] op_sel_hi:[0,1,0]
+// GFX11: [0x05,0x0a,0x22,0xcc,0xc1,0xfa,0x04,0x53]
+
+v_fma_mixhi_f16 v5, 0.5, -|vcc_lo|, -|exec_hi| op_sel:[0,1,0] op_sel_hi:[1,0,0]
+// GFX11: [0x05,0x16,0x22,0xcc,0xf0,0xd4,0xfc,0xc9]
+
+v_fma_mixhi_f16 v255, -|src_scc|, -|vcc_hi|, null op_sel:[0,0,1] op_sel_hi:[0,0,0] clamp
+// GFX11: [0xff,0xa3,0x22,0xcc,0xfd,0xd6,0xf0,0x61]
+
+v_fma_mixlo_f16 v5, v1, v2, s3
+// GFX11: [0x05,0x00,0x21,0xcc,0x01,0x05,0x0e,0x00]
+
+v_fma_mixlo_f16 v5, v255, v255, s105
+// GFX11: [0x05,0x00,0x21,0xcc,0xff,0xff,0xa7,0x01]
+
+v_fma_mixlo_f16 v5, s1, s2, v3
+// GFX11: [0x05,0x00,0x21,0xcc,0x01,0x04,0x0c,0x04]
+
+v_fma_mixlo_f16 v5, s105, s105, m0
+// GFX11: [0x05,0x00,0x21,0xcc,0x69,0xd2,0xf4,0x01]
+
+v_fma_mixlo_f16 v5, vcc_lo, ttmp15, ttmp15
+// GFX11: [0x05,0x00,0x21,0xcc,0x6a,0xf6,0xec,0x01]
+
+v_fma_mixlo_f16 v5, vcc_hi, src_scc, v255
+// GFX11: [0x05,0x00,0x21,0xcc,0x6b,0xfa,0xfd,0x07]
+
+v_fma_mixlo_f16 v5, |ttmp15|, 0.5, -vcc_hi
+// GFX11: [0x05,0x01,0x21,0xcc,0x7b,0xe0,0xad,0x81]
+
+v_fma_mixlo_f16 v5, -m0, -1, |vcc_lo|
+// GFX11: [0x05,0x04,0x21,0xcc,0x7d,0x82,0xa9,0x21]
+
+v_fma_mixlo_f16 v5, -|exec_lo|, null, -|src_scc|
+// GFX11: [0x05,0x05,0x21,0xcc,0x7e,0xf8,0xf4,0xa3]
+
+v_fma_mixlo_f16 v5, -|exec_hi|, -|exec_lo|, -|exec_lo| op_sel:[1,1,1] op_sel_hi:[1,1,1]
+// GFX11: [0x05,0x7f,0x21,0xcc,0x7f,0xfc,0xf8,0xf9]
+
+v_fma_mixlo_f16 v5, null, exec_hi, 0.5 op_sel:[0,0,0] op_sel_hi:[0,0,1]
+// GFX11: [0x05,0x40,0x21,0xcc,0x7c,0xfe,0xc0,0x03]
+
+v_fma_mixlo_f16 v5, -1, -|m0|, -1 op_sel:[1,0,0] op_sel_hi:[0,1,0]
+// GFX11: [0x05,0x0a,0x21,0xcc,0xc1,0xfa,0x04,0x53]
+
+v_fma_mixlo_f16 v5, 0.5, -|vcc_lo|, -|exec_hi| op_sel:[0,1,0] op_sel_hi:[1,0,0]
+// GFX11: [0x05,0x16,0x21,0xcc,0xf0,0xd4,0xfc,0xc9]
+
+v_fma_mixlo_f16 v255, -|src_scc|, -|vcc_hi|, null op_sel:[0,0,1] op_sel_hi:[0,0,0] clamp
+// GFX11: [0xff,0xa3,0x21,0xcc,0xfd,0xd6,0xf0,0x61]
+
+v_pk_add_f16 v5, v1, v2
+// GFX11: [0x05,0x40,0x0f,0xcc,0x01,0x05,0x02,0x18]
+
+v_pk_add_f16 v5, v255, v255
+// GFX11: [0x05,0x40,0x0f,0xcc,0xff,0xff,0x03,0x18]
+
+v_pk_add_f16 v5, s1, s2
+// GFX11: [0x05,0x40,0x0f,0xcc,0x01,0x04,0x00,0x18]
+
+v_pk_add_f16 v5, s105, s105
+// GFX11: [0x05,0x40,0x0f,0xcc,0x69,0xd2,0x00,0x18]
+
+v_pk_add_f16 v5, vcc_lo, ttmp15
+// GFX11: [0x05,0x40,0x0f,0xcc,0x6a,0xf6,0x00,0x18]
+
+v_pk_add_f16 v5, vcc_hi, 0xfe0b
+// GFX11: [0x05,0x40,0x0f,0xcc,0x6b,0xfe,0x01,0x18,0x0b,0xfe,0x00,0x00]
+
+v_pk_add_f16 v5, ttmp15, src_scc
+// GFX11: [0x05,0x40,0x0f,0xcc,0x7b,0xfa,0x01,0x18]
+
+v_pk_add_f16 v5, m0, 0.5
+// GFX11: [0x05,0x40,0x0f,0xcc,0x7d,0xe0,0x01,0x18]
+
+v_pk_add_f16 v5, exec_lo, -1
+// GFX11: [0x05,0x40,0x0f,0xcc,0x7e,0x82,0x01,0x18]
+
+v_pk_add_f16 v5, exec_hi, null
+// GFX11: [0x05,0x40,0x0f,0xcc,0x7f,0xf8,0x00,0x18]
+
+v_pk_add_f16 v5, null, exec_lo
+// GFX11: [0x05,0x40,0x0f,0xcc,0x7c,0xfc,0x00,0x18]
+
+v_pk_add_f16 v5, -1, exec_hi op_sel:[1,1] op_sel_hi:[0,0] neg_lo:[1,0] neg_hi:[1,0]
+// GFX11: [0x05,0x59,0x0f,0xcc,0xc1,0xfe,0x00,0x20]
+
+v_pk_add_f16 v5, 0.5, m0 op_sel:[0,0] op_sel_hi:[1,1] neg_lo:[0,1] neg_hi:[0,1]
+// GFX11: [0x05,0x42,0x0f,0xcc,0xf0,0xfa,0x00,0x58]
+
+v_pk_add_f16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1] neg_lo:[0,0] neg_hi:[0,0]
+// GFX11: [0x05,0x48,0x0f,0xcc,0xfd,0xd4,0x00,0x10]
+
+v_pk_add_f16 v255, 0xfe0b, vcc_hi op_sel:[0,1] op_sel_hi:[1,0] neg_lo:[1,1] neg_hi:[1,1] clamp
+// GFX11: [0xff,0xd3,0x0f,0xcc,0xff,0xd6,0x00,0x68,0x0b,0xfe,0x00,0x00]
+
+v_pk_add_i16 v5, v1, v2
+// GFX11: [0x05,0x40,0x02,0xcc,0x01,0x05,0x02,0x18]
+
+v_pk_add_i16 v5, v255, v255
+// GFX11: [0x05,0x40,0x02,0xcc,0xff,0xff,0x03,0x18]
+
+v_pk_add_i16 v5, s1, s2
+// GFX11: [0x05,0x40,0x02,0xcc,0x01,0x04,0x00,0x18]
+
+v_pk_add_i16 v5, s105, s105
+// GFX11: [0x05,0x40,0x02,0xcc,0x69,0xd2,0x00,0x18]
+
+v_pk_add_i16 v5, vcc_lo, ttmp15
+// GFX11: [0x05,0x40,0x02,0xcc,0x6a,0xf6,0x00,0x18]
+
+v_pk_add_i16 v5, vcc_hi, 0xfe0b
+// GFX11: [0x05,0x40,0x02,0xcc,0x6b,0xfe,0x01,0x18,0x0b,0xfe,0x00,0x00]
+
+v_pk_add_i16 v5, ttmp15, src_scc
+// GFX11: [0x05,0x40,0x02,0xcc,0x7b,0xfa,0x01,0x18]
+
+v_pk_add_i16 v5, m0, 0.5
+// GFX11: [0x05,0x40,0x02,0xcc,0x7d,0xfe,0x01,0x18,0x00,0x38,0x00,0x00]
+
+v_pk_add_i16 v5, exec_lo, -1
+// GFX11: [0x05,0x40,0x02,0xcc,0x7e,0x82,0x01,0x18]
+
+v_pk_add_i16 v5, exec_hi, null
+// GFX11: [0x05,0x40,0x02,0xcc,0x7f,0xf8,0x00,0x18]
+
+v_pk_add_i16 v5, null, exec_lo
+// GFX11: [0x05,0x40,0x02,0xcc,0x7c,0xfc,0x00,0x18]
+
+v_pk_add_i16 v5, -1, exec_hi op_sel:[1,1] op_sel_hi:[0,0]
+// GFX11: [0x05,0x58,0x02,0xcc,0xc1,0xfe,0x00,0x00]
+
+v_pk_add_i16 v5, 0.5, m0 op_sel:[0,0] op_sel_hi:[1,1]
+// GFX11: [0x05,0x40,0x02,0xcc,0xff,0xfa,0x00,0x18,0x00,0x38,0x00,0x00]
+
+v_pk_add_i16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1]
+// GFX11: [0x05,0x48,0x02,0xcc,0xfd,0xd4,0x00,0x10]
+
+v_pk_add_i16 v255, 0xfe0b, vcc_hi op_sel:[0,1] op_sel_hi:[1,0] clamp
+// GFX11: [0xff,0xd0,0x02,0xcc,0xff,0xd6,0x00,0x08,0x0b,0xfe,0x00,0x00]
+
+v_pk_add_u16 v5, v1, v2
+// GFX11: [0x05,0x40,0x0a,0xcc,0x01,0x05,0x02,0x18]
+
+v_pk_add_u16 v5, v255, v255
+// GFX11: [0x05,0x40,0x0a,0xcc,0xff,0xff,0x03,0x18]
+
+v_pk_add_u16 v5, s1, s2
+// GFX11: [0x05,0x40,0x0a,0xcc,0x01,0x04,0x00,0x18]
+
+v_pk_add_u16 v5, s105, s105
+// GFX11: [0x05,0x40,0x0a,0xcc,0x69,0xd2,0x00,0x18]
+
+v_pk_add_u16 v5, vcc_lo, ttmp15
+// GFX11: [0x05,0x40,0x0a,0xcc,0x6a,0xf6,0x00,0x18]
+
+v_pk_add_u16 v5, vcc_hi, 0xfe0b
+// GFX11: [0x05,0x40,0x0a,0xcc,0x6b,0xfe,0x01,0x18,0x0b,0xfe,0x00,0x00]
+
+v_pk_add_u16 v5, ttmp15, src_scc
+// GFX11: [0x05,0x40,0x0a,0xcc,0x7b,0xfa,0x01,0x18]
+
+v_pk_add_u16 v5, m0, 0.5
+// GFX11: [0x05,0x40,0x0a,0xcc,0x7d,0xfe,0x01,0x18,0x00,0x38,0x00,0x00]
+
+v_pk_add_u16 v5, exec_lo, -1
+// GFX11: [0x05,0x40,0x0a,0xcc,0x7e,0x82,0x01,0x18]
+
+v_pk_add_u16 v5, exec_hi, null
+// GFX11: [0x05,0x40,0x0a,0xcc,0x7f,0xf8,0x00,0x18]
+
+v_pk_add_u16 v5, null, exec_lo
+// GFX11: [0x05,0x40,0x0a,0xcc,0x7c,0xfc,0x00,0x18]
+
+v_pk_add_u16 v5, -1, exec_hi op_sel:[1,1] op_sel_hi:[0,0]
+// GFX11: [0x05,0x58,0x0a,0xcc,0xc1,0xfe,0x00,0x00]
+
+v_pk_add_u16 v5, 0.5, m0 op_sel:[0,0] op_sel_hi:[1,1]
+// GFX11: [0x05,0x40,0x0a,0xcc,0xff,0xfa,0x00,0x18,0x00,0x38,0x00,0x00]
+
+v_pk_add_u16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1]
+// GFX11: [0x05,0x48,0x0a,0xcc,0xfd,0xd4,0x00,0x10]
+
+v_pk_add_u16 v255, 0xfe0b, vcc_hi op_sel:[0,1] op_sel_hi:[1,0] clamp
+// GFX11: [0xff,0xd0,0x0a,0xcc,0xff,0xd6,0x00,0x08,0x0b,0xfe,0x00,0x00]
+
+v_pk_ashrrev_i16 v5, v1, v2
+// GFX11: [0x05,0x40,0x06,0xcc,0x01,0x05,0x02,0x18]
+
+v_pk_ashrrev_i16 v5, v255, v255
+// GFX11: [0x05,0x40,0x06,0xcc,0xff,0xff,0x03,0x18]
+
+v_pk_ashrrev_i16 v5, s1, s2
+// GFX11: [0x05,0x40,0x06,0xcc,0x01,0x04,0x00,0x18]
+
+v_pk_ashrrev_i16 v5, s105, s105
+// GFX11: [0x05,0x40,0x06,0xcc,0x69,0xd2,0x00,0x18]
+
+v_pk_ashrrev_i16 v5, vcc_lo, ttmp15
+// GFX11: [0x05,0x40,0x06,0xcc,0x6a,0xf6,0x00,0x18]
+
+v_pk_ashrrev_i16 v5, vcc_hi, 0xfe0b
+// GFX11: [0x05,0x40,0x06,0xcc,0x6b,0xfe,0x01,0x18,0x0b,0xfe,0x00,0x00]
+
+v_pk_ashrrev_i16 v5, ttmp15, src_scc
+// GFX11: [0x05,0x40,0x06,0xcc,0x7b,0xfa,0x01,0x18]
+
+v_pk_ashrrev_i16 v5, m0, 0.5
+// GFX11: [0x05,0x40,0x06,0xcc,0x7d,0xfe,0x01,0x18,0x00,0x38,0x00,0x00]
+
+v_pk_ashrrev_i16 v5, exec_lo, -1
+// GFX11: [0x05,0x40,0x06,0xcc,0x7e,0x82,0x01,0x18]
+
+v_pk_ashrrev_i16 v5, exec_hi, null
+// GFX11: [0x05,0x40,0x06,0xcc,0x7f,0xf8,0x00,0x18]
+
+v_pk_ashrrev_i16 v5, null, exec_lo
+// GFX11: [0x05,0x40,0x06,0xcc,0x7c,0xfc,0x00,0x18]
+
+v_pk_ashrrev_i16 v5, -1, exec_hi op_sel:[1,1] op_sel_hi:[0,0]
+// GFX11: [0x05,0x58,0x06,0xcc,0xc1,0xfe,0x00,0x00]
+
+v_pk_ashrrev_i16 v5, 0.5, m0 op_sel:[0,0] op_sel_hi:[1,1]
+// GFX11: [0x05,0x40,0x06,0xcc,0xff,0xfa,0x00,0x18,0x00,0x38,0x00,0x00]
+
+v_pk_ashrrev_i16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1]
+// GFX11: [0x05,0x48,0x06,0xcc,0xfd,0xd4,0x00,0x10]
+
+v_pk_ashrrev_i16 v255, 0xfe0b, vcc_hi op_sel:[0,1] op_sel_hi:[1,0]
+// GFX11: [0xff,0x50,0x06,0xcc,0xff,0xd6,0x00,0x08,0x0b,0xfe,0x00,0x00]
+
+v_pk_fma_f16 v5, v1, v2, s3
+// GFX11: [0x05,0x40,0x0e,0xcc,0x01,0x05,0x0e,0x18]
+
+v_pk_fma_f16 v5, v255, s2, s105
+// GFX11: [0x05,0x40,0x0e,0xcc,0xff,0x05,0xa4,0x19]
+
+v_pk_fma_f16 v5, s1, v255, exec_hi
+// GFX11: [0x05,0x40,0x0e,0xcc,0x01,0xfe,0xff,0x19]
+
+v_pk_fma_f16 v5, s105, s105, exec_lo
+// GFX11: [0x05,0x40,0x0e,0xcc,0x69,0xd2,0xf8,0x19]
+
+v_pk_fma_f16 v5, vcc_lo, ttmp15, v3
+// GFX11: [0x05,0x40,0x0e,0xcc,0x6a,0xf6,0x0c,0x1c]
+
+v_pk_fma_f16 v5, vcc_hi, 0xfe0b, v255
+// GFX11: [0x05,0x40,0x0e,0xcc,0x6b,0xfe,0xfd,0x1f,0x0b,0xfe,0x00,0x00]
+
+v_pk_fma_f16 v5, ttmp15, src_scc, ttmp15
+// GFX11: [0x05,0x40,0x0e,0xcc,0x7b,0xfa,0xed,0x19]
+
+v_pk_fma_f16 v5, m0, 0.5, m0 op_sel_hi:[0,0,0]
+// GFX11: [0x05,0x00,0x0e,0xcc,0x7d,0xe0,0xf5,0x01]
+
+v_pk_fma_f16 v5, exec_lo, -1, vcc_hi op_sel_hi:[0,0,1]
+// GFX11: [0x05,0x40,0x0e,0xcc,0x7e,0x82,0xad,0x01]
+
+v_pk_fma_f16 v5, exec_hi, null, vcc_lo op_sel_hi:[0,1,0]
+// GFX11: [0x05,0x00,0x0e,0xcc,0x7f,0xf8,0xa8,0x11]
+
+v_pk_fma_f16 v5, null, exec_lo, 0xfe0b op_sel:[1,1,1] op_sel_hi:[1,0,0] neg_lo:[1,0,0] neg_hi:[1,0,0]
+// GFX11: [0x05,0x39,0x0e,0xcc,0x7c,0xfc,0xfc,0x2b,0x0b,0xfe,0x00,0x00]
+
+v_pk_fma_f16 v5, -1, exec_hi, src_scc op_sel:[0,0,0] op_sel_hi:[1,1,1] neg_lo:[0,1,0] neg_hi:[0,1,0]
+// GFX11: [0x05,0x42,0x0e,0xcc,0xc1,0xfe,0xf4,0x5b]
+
+v_pk_fma_f16 v5, 0.5, m0, 0.5 op_sel:[1,0,0] op_sel_hi:[0,1,1] neg_lo:[0,0,1] neg_hi:[0,0,1]
+// GFX11: [0x05,0x4c,0x0e,0xcc,0xf0,0xfa,0xc0,0x93]
+
+v_pk_fma_f16 v5, src_scc, vcc_lo, -1 op_sel:[0,1,0] op_sel_hi:[1,0,1] neg_lo:[0,0,0] neg_hi:[0,0,0]
+// GFX11: [0x05,0x50,0x0e,0xcc,0xfd,0xd4,0x04,0x0b]
+
+v_pk_fma_f16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,1] op_sel_hi:[1,1,0] neg_lo:[1,1,1] neg_hi:[1,1,1] clamp
+// GFX11: [0xff,0xa7,0x0e,0xcc,0xff,0xd6,0xf0,0xf9,0x0b,0xfe,0x00,0x00]
+
+v_pk_lshlrev_b16 v5, v1, v2
+// GFX11: [0x05,0x40,0x04,0xcc,0x01,0x05,0x02,0x18]
+
+v_pk_lshlrev_b16 v5, v255, v255
+// GFX11: [0x05,0x40,0x04,0xcc,0xff,0xff,0x03,0x18]
+
+v_pk_lshlrev_b16 v5, s1, s2
+// GFX11: [0x05,0x40,0x04,0xcc,0x01,0x04,0x00,0x18]
+
+v_pk_lshlrev_b16 v5, s105, s105
+// GFX11: [0x05,0x40,0x04,0xcc,0x69,0xd2,0x00,0x18]
+
+v_pk_lshlrev_b16 v5, vcc_lo, ttmp15
+// GFX11: [0x05,0x40,0x04,0xcc,0x6a,0xf6,0x00,0x18]
+
+v_pk_lshlrev_b16 v5, vcc_hi, 0xfe0b
+// GFX11: [0x05,0x40,0x04,0xcc,0x6b,0xfe,0x01,0x18,0x0b,0xfe,0x00,0x00]
+
+v_pk_lshlrev_b16 v5, ttmp15, src_scc
+// GFX11: [0x05,0x40,0x04,0xcc,0x7b,0xfa,0x01,0x18]
+
+v_pk_lshlrev_b16 v5, m0, 0.5
+// GFX11: [0x05,0x40,0x04,0xcc,0x7d,0xfe,0x01,0x18,0x00,0x38,0x00,0x00]
+
+v_pk_lshlrev_b16 v5, exec_lo, -1
+// GFX11: [0x05,0x40,0x04,0xcc,0x7e,0x82,0x01,0x18]
+
+v_pk_lshlrev_b16 v5, exec_hi, null
+// GFX11: [0x05,0x40,0x04,0xcc,0x7f,0xf8,0x00,0x18]
+
+v_pk_lshlrev_b16 v5, null, exec_lo
+// GFX11: [0x05,0x40,0x04,0xcc,0x7c,0xfc,0x00,0x18]
+
+v_pk_lshlrev_b16 v5, -1, exec_hi op_sel:[1,1] op_sel_hi:[0,0]
+// GFX11: [0x05,0x58,0x04,0xcc,0xc1,0xfe,0x00,0x00]
+
+v_pk_lshlrev_b16 v5, 0.5, m0 op_sel:[0,0] op_sel_hi:[1,1]
+// GFX11: [0x05,0x40,0x04,0xcc,0xff,0xfa,0x00,0x18,0x00,0x38,0x00,0x00]
+
+v_pk_lshlrev_b16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1]
+// GFX11: [0x05,0x48,0x04,0xcc,0xfd,0xd4,0x00,0x10]
+
+v_pk_lshlrev_b16 v255, 0xfe0b, vcc_hi op_sel:[0,1] op_sel_hi:[1,0]
+// GFX11: [0xff,0x50,0x04,0xcc,0xff,0xd6,0x00,0x08,0x0b,0xfe,0x00,0x00]
+
+v_pk_lshrrev_b16 v5, v1, v2
+// GFX11: [0x05,0x40,0x05,0xcc,0x01,0x05,0x02,0x18]
+
+v_pk_lshrrev_b16 v5, v255, v255
+// GFX11: [0x05,0x40,0x05,0xcc,0xff,0xff,0x03,0x18]
+
+v_pk_lshrrev_b16 v5, s1, s2
+// GFX11: [0x05,0x40,0x05,0xcc,0x01,0x04,0x00,0x18]
+
+v_pk_lshrrev_b16 v5, s105, s105
+// GFX11: [0x05,0x40,0x05,0xcc,0x69,0xd2,0x00,0x18]
+
+v_pk_lshrrev_b16 v5, vcc_lo, ttmp15
+// GFX11: [0x05,0x40,0x05,0xcc,0x6a,0xf6,0x00,0x18]
+
+v_pk_lshrrev_b16 v5, vcc_hi, 0xfe0b
+// GFX11: [0x05,0x40,0x05,0xcc,0x6b,0xfe,0x01,0x18,0x0b,0xfe,0x00,0x00]
+
+v_pk_lshrrev_b16 v5, ttmp15, src_scc
+// GFX11: [0x05,0x40,0x05,0xcc,0x7b,0xfa,0x01,0x18]
+
+v_pk_lshrrev_b16 v5, m0, 0.5
+// GFX11: [0x05,0x40,0x05,0xcc,0x7d,0xfe,0x01,0x18,0x00,0x38,0x00,0x00]
+
+v_pk_lshrrev_b16 v5, exec_lo, -1
+// GFX11: [0x05,0x40,0x05,0xcc,0x7e,0x82,0x01,0x18]
+
+v_pk_lshrrev_b16 v5, exec_hi, null
+// GFX11: [0x05,0x40,0x05,0xcc,0x7f,0xf8,0x00,0x18]
+
+v_pk_lshrrev_b16 v5, null, exec_lo
+// GFX11: [0x05,0x40,0x05,0xcc,0x7c,0xfc,0x00,0x18]
+
+v_pk_lshrrev_b16 v5, -1, exec_hi op_sel:[1,1] op_sel_hi:[0,0]
+// GFX11: [0x05,0x58,0x05,0xcc,0xc1,0xfe,0x00,0x00]
+
+v_pk_lshrrev_b16 v5, 0.5, m0 op_sel:[0,0] op_sel_hi:[1,1]
+// GFX11: [0x05,0x40,0x05,0xcc,0xff,0xfa,0x00,0x18,0x00,0x38,0x00,0x00]
+
+v_pk_lshrrev_b16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1]
+// GFX11: [0x05,0x48,0x05,0xcc,0xfd,0xd4,0x00,0x10]
+
+v_pk_lshrrev_b16 v255, 0xfe0b, vcc_hi op_sel:[0,1] op_sel_hi:[1,0]
+// GFX11: [0xff,0x50,0x05,0xcc,0xff,0xd6,0x00,0x08,0x0b,0xfe,0x00,0x00]
+
+v_pk_mad_i16 v5, v1, v2, s3
+// GFX11: [0x05,0x40,0x00,0xcc,0x01,0x05,0x0e,0x18]
+
+v_pk_mad_i16 v5, v255, s2, s105
+// GFX11: [0x05,0x40,0x00,0xcc,0xff,0x05,0xa4,0x19]
+
+v_pk_mad_i16 v5, s1, v255, exec_hi
+// GFX11: [0x05,0x40,0x00,0xcc,0x01,0xfe,0xff,0x19]
+
+v_pk_mad_i16 v5, s105, s105, exec_lo
+// GFX11: [0x05,0x40,0x00,0xcc,0x69,0xd2,0xf8,0x19]
+
+v_pk_mad_i16 v5, vcc_lo, ttmp15, v3
+// GFX11: [0x05,0x40,0x00,0xcc,0x6a,0xf6,0x0c,0x1c]
+
+v_pk_mad_i16 v5, vcc_hi, 0xfe0b, v255
+// GFX11: [0x05,0x40,0x00,0xcc,0x6b,0xfe,0xfd,0x1f,0x0b,0xfe,0x00,0x00]
+
+v_pk_mad_i16 v5, ttmp15, src_scc, ttmp15
+// GFX11: [0x05,0x40,0x00,0xcc,0x7b,0xfa,0xed,0x19]
+
+v_pk_mad_i16 v5, m0, 0.5, m0 op_sel_hi:[0,0,0]
+// GFX11: [0x05,0x00,0x00,0xcc,0x7d,0xfe,0xf5,0x01,0x00,0x38,0x00,0x00]
+
+v_pk_mad_i16 v5, exec_lo, -1, vcc_hi op_sel_hi:[0,0,1]
+// GFX11: [0x05,0x40,0x00,0xcc,0x7e,0x82,0xad,0x01]
+
+v_pk_mad_i16 v5, exec_hi, null, vcc_lo op_sel_hi:[0,1,0]
+// GFX11: [0x05,0x00,0x00,0xcc,0x7f,0xf8,0xa8,0x11]
+
+v_pk_mad_i16 v5, null, exec_lo, 0xfe0b op_sel:[1,1,1] op_sel_hi:[1,0,0]
+// GFX11: [0x05,0x38,0x00,0xcc,0x7c,0xfc,0xfc,0x0b,0x0b,0xfe,0x00,0x00]
+
+v_pk_mad_i16 v5, -1, exec_hi, src_scc op_sel:[0,0,0] op_sel_hi:[1,1,1]
+// GFX11: [0x05,0x40,0x00,0xcc,0xc1,0xfe,0xf4,0x1b]
+
+v_pk_mad_i16 v5, 0.5, m0, 0.5 op_sel:[1,0,0] op_sel_hi:[0,1,1]
+// GFX11: [0x05,0x48,0x00,0xcc,0xff,0xfa,0xfc,0x13,0x00,0x38,0x00,0x00]
+
+v_pk_mad_i16 v5, src_scc, vcc_lo, -1 op_sel:[0,1,0] op_sel_hi:[1,0,1]
+// GFX11: [0x05,0x50,0x00,0xcc,0xfd,0xd4,0x04,0x0b]
+
+v_pk_mad_i16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,1] op_sel_hi:[1,1,0] clamp
+// GFX11: [0xff,0xa0,0x00,0xcc,0xff,0xd6,0xf0,0x19,0x0b,0xfe,0x00,0x00]
+
+v_pk_mad_u16 v5, v1, v2, s3
+// GFX11: [0x05,0x40,0x09,0xcc,0x01,0x05,0x0e,0x18]
+
+v_pk_mad_u16 v5, v255, s2, s105
+// GFX11: [0x05,0x40,0x09,0xcc,0xff,0x05,0xa4,0x19]
+
+v_pk_mad_u16 v5, s1, v255, exec_hi
+// GFX11: [0x05,0x40,0x09,0xcc,0x01,0xfe,0xff,0x19]
+
+v_pk_mad_u16 v5, s105, s105, exec_lo
+// GFX11: [0x05,0x40,0x09,0xcc,0x69,0xd2,0xf8,0x19]
+
+v_pk_mad_u16 v5, vcc_lo, ttmp15, v3
+// GFX11: [0x05,0x40,0x09,0xcc,0x6a,0xf6,0x0c,0x1c]
+
+v_pk_mad_u16 v5, vcc_hi, 0xfe0b, v255
+// GFX11: [0x05,0x40,0x09,0xcc,0x6b,0xfe,0xfd,0x1f,0x0b,0xfe,0x00,0x00]
+
+v_pk_mad_u16 v5, ttmp15, src_scc, ttmp15
+// GFX11: [0x05,0x40,0x09,0xcc,0x7b,0xfa,0xed,0x19]
+
+v_pk_mad_u16 v5, m0, 0.5, m0 op_sel_hi:[0,0,0]
+// GFX11: [0x05,0x00,0x09,0xcc,0x7d,0xfe,0xf5,0x01,0x00,0x38,0x00,0x00]
+
+v_pk_mad_u16 v5, exec_lo, -1, vcc_hi op_sel_hi:[0,0,1]
+// GFX11: [0x05,0x40,0x09,0xcc,0x7e,0x82,0xad,0x01]
+
+v_pk_mad_u16 v5, exec_hi, null, vcc_lo op_sel_hi:[0,1,0]
+// GFX11: [0x05,0x00,0x09,0xcc,0x7f,0xf8,0xa8,0x11]
+
+v_pk_mad_u16 v5, null, exec_lo, 0xfe0b op_sel:[1,1,1] op_sel_hi:[1,0,0]
+// GFX11: [0x05,0x38,0x09,0xcc,0x7c,0xfc,0xfc,0x0b,0x0b,0xfe,0x00,0x00]
+
+v_pk_mad_u16 v5, -1, exec_hi, src_scc op_sel:[0,0,0] op_sel_hi:[1,1,1]
+// GFX11: [0x05,0x40,0x09,0xcc,0xc1,0xfe,0xf4,0x1b]
+
+v_pk_mad_u16 v5, 0.5, m0, 0.5 op_sel:[1,0,0] op_sel_hi:[0,1,1]
+// GFX11: [0x05,0x48,0x09,0xcc,0xff,0xfa,0xfc,0x13,0x00,0x38,0x00,0x00]
+
+v_pk_mad_u16 v5, src_scc, vcc_lo, -1 op_sel:[0,1,0] op_sel_hi:[1,0,1]
+// GFX11: [0x05,0x50,0x09,0xcc,0xfd,0xd4,0x04,0x0b]
+
+v_pk_mad_u16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,1] op_sel_hi:[1,1,0] clamp
+// GFX11: [0xff,0xa0,0x09,0xcc,0xff,0xd6,0xf0,0x19,0x0b,0xfe,0x00,0x00]
+
+v_pk_max_f16 v5, v1, v2
+// GFX11: [0x05,0x40,0x12,0xcc,0x01,0x05,0x02,0x18]
+
+v_pk_max_f16 v5, v255, v255
+// GFX11: [0x05,0x40,0x12,0xcc,0xff,0xff,0x03,0x18]
+
+v_pk_max_f16 v5, s1, s2
+// GFX11: [0x05,0x40,0x12,0xcc,0x01,0x04,0x00,0x18]
+
+v_pk_max_f16 v5, s105, s105
+// GFX11: [0x05,0x40,0x12,0xcc,0x69,0xd2,0x00,0x18]
+
+v_pk_max_f16 v5, vcc_lo, ttmp15
+// GFX11: [0x05,0x40,0x12,0xcc,0x6a,0xf6,0x00,0x18]
+
+v_pk_max_f16 v5, vcc_hi, 0xfe0b
+// GFX11: [0x05,0x40,0x12,0xcc,0x6b,0xfe,0x01,0x18,0x0b,0xfe,0x00,0x00]
+
+v_pk_max_f16 v5, ttmp15, src_scc
+// GFX11: [0x05,0x40,0x12,0xcc,0x7b,0xfa,0x01,0x18]
+
+v_pk_max_f16 v5, m0, 0.5
+// GFX11: [0x05,0x40,0x12,0xcc,0x7d,0xe0,0x01,0x18]
+
+v_pk_max_f16 v5, exec_lo, -1
+// GFX11: [0x05,0x40,0x12,0xcc,0x7e,0x82,0x01,0x18]
+
+v_pk_max_f16 v5, exec_hi, null
+// GFX11: [0x05,0x40,0x12,0xcc,0x7f,0xf8,0x00,0x18]
+
+v_pk_max_f16 v5, null, exec_lo
+// GFX11: [0x05,0x40,0x12,0xcc,0x7c,0xfc,0x00,0x18]
+
+v_pk_max_f16 v5, -1, exec_hi op_sel:[1,1] op_sel_hi:[0,0] neg_lo:[1,0] neg_hi:[1,0]
+// GFX11: [0x05,0x59,0x12,0xcc,0xc1,0xfe,0x00,0x20]
+
+v_pk_max_f16 v5, 0.5, m0 op_sel:[0,0] op_sel_hi:[1,1] neg_lo:[0,1] neg_hi:[0,1]
+// GFX11: [0x05,0x42,0x12,0xcc,0xf0,0xfa,0x00,0x58]
+
+v_pk_max_f16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1] neg_lo:[0,0] neg_hi:[0,0]
+// GFX11: [0x05,0x48,0x12,0xcc,0xfd,0xd4,0x00,0x10]
+
+v_pk_max_f16 v255, 0xfe0b, vcc_hi op_sel:[0,1] op_sel_hi:[1,0] neg_lo:[1,1] neg_hi:[1,1] clamp
+// GFX11: [0xff,0xd3,0x12,0xcc,0xff,0xd6,0x00,0x68,0x0b,0xfe,0x00,0x00]
+
+v_pk_max_i16 v5, v1, v2
+// GFX11: [0x05,0x40,0x07,0xcc,0x01,0x05,0x02,0x18]
+
+v_pk_max_i16 v5, v255, v255
+// GFX11: [0x05,0x40,0x07,0xcc,0xff,0xff,0x03,0x18]
+
+v_pk_max_i16 v5, s1, s2
+// GFX11: [0x05,0x40,0x07,0xcc,0x01,0x04,0x00,0x18]
+
+v_pk_max_i16 v5, s105, s105
+// GFX11: [0x05,0x40,0x07,0xcc,0x69,0xd2,0x00,0x18]
+
+v_pk_max_i16 v5, vcc_lo, ttmp15
+// GFX11: [0x05,0x40,0x07,0xcc,0x6a,0xf6,0x00,0x18]
+
+v_pk_max_i16 v5, vcc_hi, 0xfe0b
+// GFX11: [0x05,0x40,0x07,0xcc,0x6b,0xfe,0x01,0x18,0x0b,0xfe,0x00,0x00]
+
+v_pk_max_i16 v5, ttmp15, src_scc
+// GFX11: [0x05,0x40,0x07,0xcc,0x7b,0xfa,0x01,0x18]
+
+v_pk_max_i16 v5, m0, 0.5
+// GFX11: [0x05,0x40,0x07,0xcc,0x7d,0xfe,0x01,0x18,0x00,0x38,0x00,0x00]
+
+v_pk_max_i16 v5, exec_lo, -1
+// GFX11: [0x05,0x40,0x07,0xcc,0x7e,0x82,0x01,0x18]
+
+v_pk_max_i16 v5, exec_hi, null
+// GFX11: [0x05,0x40,0x07,0xcc,0x7f,0xf8,0x00,0x18]
+
+v_pk_max_i16 v5, null, exec_lo
+// GFX11: [0x05,0x40,0x07,0xcc,0x7c,0xfc,0x00,0x18]
+
+v_pk_max_i16 v5, -1, exec_hi op_sel:[1,1] op_sel_hi:[0,0]
+// GFX11: [0x05,0x58,0x07,0xcc,0xc1,0xfe,0x00,0x00]
+
+v_pk_max_i16 v5, 0.5, m0 op_sel:[0,0] op_sel_hi:[1,1]
+// GFX11: [0x05,0x40,0x07,0xcc,0xff,0xfa,0x00,0x18,0x00,0x38,0x00,0x00]
+
+v_pk_max_i16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1]
+// GFX11: [0x05,0x48,0x07,0xcc,0xfd,0xd4,0x00,0x10]
+
+v_pk_max_i16 v255, 0xfe0b, vcc_hi op_sel:[0,1] op_sel_hi:[1,0]
+// GFX11: [0xff,0x50,0x07,0xcc,0xff,0xd6,0x00,0x08,0x0b,0xfe,0x00,0x00]
+
+v_pk_max_u16 v5, v1, v2
+// GFX11: [0x05,0x40,0x0c,0xcc,0x01,0x05,0x02,0x18]
+
+v_pk_max_u16 v5, v255, v255
+// GFX11: [0x05,0x40,0x0c,0xcc,0xff,0xff,0x03,0x18]
+
+v_pk_max_u16 v5, s1, s2
+// GFX11: [0x05,0x40,0x0c,0xcc,0x01,0x04,0x00,0x18]
+
+v_pk_max_u16 v5, s105, s105
+// GFX11: [0x05,0x40,0x0c,0xcc,0x69,0xd2,0x00,0x18]
+
+v_pk_max_u16 v5, vcc_lo, ttmp15
+// GFX11: [0x05,0x40,0x0c,0xcc,0x6a,0xf6,0x00,0x18]
+
+v_pk_max_u16 v5, vcc_hi, 0xfe0b
+// GFX11: [0x05,0x40,0x0c,0xcc,0x6b,0xfe,0x01,0x18,0x0b,0xfe,0x00,0x00]
+
+v_pk_max_u16 v5, ttmp15, src_scc
+// GFX11: [0x05,0x40,0x0c,0xcc,0x7b,0xfa,0x01,0x18]
+
+v_pk_max_u16 v5, m0, 0.5
+// GFX11: [0x05,0x40,0x0c,0xcc,0x7d,0xfe,0x01,0x18,0x00,0x38,0x00,0x00]
+
+v_pk_max_u16 v5, exec_lo, -1
+// GFX11: [0x05,0x40,0x0c,0xcc,0x7e,0x82,0x01,0x18]
+
+v_pk_max_u16 v5, exec_hi, null
+// GFX11: [0x05,0x40,0x0c,0xcc,0x7f,0xf8,0x00,0x18]
+
+v_pk_max_u16 v5, null, exec_lo
+// GFX11: [0x05,0x40,0x0c,0xcc,0x7c,0xfc,0x00,0x18]
+
+v_pk_max_u16 v5, -1, exec_hi op_sel:[1,1] op_sel_hi:[0,0]
+// GFX11: [0x05,0x58,0x0c,0xcc,0xc1,0xfe,0x00,0x00]
+
+v_pk_max_u16 v5, 0.5, m0 op_sel:[0,0] op_sel_hi:[1,1]
+// GFX11: [0x05,0x40,0x0c,0xcc,0xff,0xfa,0x00,0x18,0x00,0x38,0x00,0x00]
+
+v_pk_max_u16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1]
+// GFX11: [0x05,0x48,0x0c,0xcc,0xfd,0xd4,0x00,0x10]
+
+v_pk_max_u16 v255, 0xfe0b, vcc_hi op_sel:[0,1] op_sel_hi:[1,0]
+// GFX11: [0xff,0x50,0x0c,0xcc,0xff,0xd6,0x00,0x08,0x0b,0xfe,0x00,0x00]
+
+v_pk_min_f16 v5, v1, v2
+// GFX11: [0x05,0x40,0x11,0xcc,0x01,0x05,0x02,0x18]
+
+v_pk_min_f16 v5, v255, v255
+// GFX11: [0x05,0x40,0x11,0xcc,0xff,0xff,0x03,0x18]
+
+v_pk_min_f16 v5, s1, s2
+// GFX11: [0x05,0x40,0x11,0xcc,0x01,0x04,0x00,0x18]
+
+v_pk_min_f16 v5, s105, s105
+// GFX11: [0x05,0x40,0x11,0xcc,0x69,0xd2,0x00,0x18]
+
+v_pk_min_f16 v5, vcc_lo, ttmp15
+// GFX11: [0x05,0x40,0x11,0xcc,0x6a,0xf6,0x00,0x18]
+
+v_pk_min_f16 v5, vcc_hi, 0xfe0b
+// GFX11: [0x05,0x40,0x11,0xcc,0x6b,0xfe,0x01,0x18,0x0b,0xfe,0x00,0x00]
+
+v_pk_min_f16 v5, ttmp15, src_scc
+// GFX11: [0x05,0x40,0x11,0xcc,0x7b,0xfa,0x01,0x18]
+
+v_pk_min_f16 v5, m0, 0.5
+// GFX11: [0x05,0x40,0x11,0xcc,0x7d,0xe0,0x01,0x18]
+
+v_pk_min_f16 v5, exec_lo, -1
+// GFX11: [0x05,0x40,0x11,0xcc,0x7e,0x82,0x01,0x18]
+
+v_pk_min_f16 v5, exec_hi, null
+// GFX11: [0x05,0x40,0x11,0xcc,0x7f,0xf8,0x00,0x18]
+
+v_pk_min_f16 v5, null, exec_lo
+// GFX11: [0x05,0x40,0x11,0xcc,0x7c,0xfc,0x00,0x18]
+
+v_pk_min_f16 v5, -1, exec_hi op_sel:[1,1] op_sel_hi:[0,0] neg_lo:[1,0] neg_hi:[1,0]
+// GFX11: [0x05,0x59,0x11,0xcc,0xc1,0xfe,0x00,0x20]
+
+v_pk_min_f16 v5, 0.5, m0 op_sel:[0,0] op_sel_hi:[1,1] neg_lo:[0,1] neg_hi:[0,1]
+// GFX11: [0x05,0x42,0x11,0xcc,0xf0,0xfa,0x00,0x58]
+
+v_pk_min_f16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1] neg_lo:[0,0] neg_hi:[0,0]
+// GFX11: [0x05,0x48,0x11,0xcc,0xfd,0xd4,0x00,0x10]
+
+v_pk_min_f16 v255, 0xfe0b, vcc_hi op_sel:[0,1] op_sel_hi:[1,0] neg_lo:[1,1] neg_hi:[1,1] clamp
+// GFX11: [0xff,0xd3,0x11,0xcc,0xff,0xd6,0x00,0x68,0x0b,0xfe,0x00,0x00]
+
+v_pk_min_i16 v5, v1, v2
+// GFX11: [0x05,0x40,0x08,0xcc,0x01,0x05,0x02,0x18]
+
+v_pk_min_i16 v5, v255, v255
+// GFX11: [0x05,0x40,0x08,0xcc,0xff,0xff,0x03,0x18]
+
+v_pk_min_i16 v5, s1, s2
+// GFX11: [0x05,0x40,0x08,0xcc,0x01,0x04,0x00,0x18]
+
+v_pk_min_i16 v5, s105, s105
+// GFX11: [0x05,0x40,0x08,0xcc,0x69,0xd2,0x00,0x18]
+
+v_pk_min_i16 v5, vcc_lo, ttmp15
+// GFX11: [0x05,0x40,0x08,0xcc,0x6a,0xf6,0x00,0x18]
+
+v_pk_min_i16 v5, vcc_hi, 0xfe0b
+// GFX11: [0x05,0x40,0x08,0xcc,0x6b,0xfe,0x01,0x18,0x0b,0xfe,0x00,0x00]
+
+v_pk_min_i16 v5, ttmp15, src_scc
+// GFX11: [0x05,0x40,0x08,0xcc,0x7b,0xfa,0x01,0x18]
+
+v_pk_min_i16 v5, m0, 0.5
+// GFX11: [0x05,0x40,0x08,0xcc,0x7d,0xfe,0x01,0x18,0x00,0x38,0x00,0x00]
+
+v_pk_min_i16 v5, exec_lo, -1
+// GFX11: [0x05,0x40,0x08,0xcc,0x7e,0x82,0x01,0x18]
+
+v_pk_min_i16 v5, exec_hi, null
+// GFX11: [0x05,0x40,0x08,0xcc,0x7f,0xf8,0x00,0x18]
+
+v_pk_min_i16 v5, null, exec_lo
+// GFX11: [0x05,0x40,0x08,0xcc,0x7c,0xfc,0x00,0x18]
+
+v_pk_min_i16 v5, -1, exec_hi op_sel:[1,1] op_sel_hi:[0,0]
+// GFX11: [0x05,0x58,0x08,0xcc,0xc1,0xfe,0x00,0x00]
+
+v_pk_min_i16 v5, 0.5, m0 op_sel:[0,0] op_sel_hi:[1,1]
+// GFX11: [0x05,0x40,0x08,0xcc,0xff,0xfa,0x00,0x18,0x00,0x38,0x00,0x00]
+
+v_pk_min_i16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1]
+// GFX11: [0x05,0x48,0x08,0xcc,0xfd,0xd4,0x00,0x10]
+
+v_pk_min_i16 v255, 0xfe0b, vcc_hi op_sel:[0,1] op_sel_hi:[1,0]
+// GFX11: [0xff,0x50,0x08,0xcc,0xff,0xd6,0x00,0x08,0x0b,0xfe,0x00,0x00]
+
+v_pk_min_u16 v5, v1, v2
+// GFX11: [0x05,0x40,0x0d,0xcc,0x01,0x05,0x02,0x18]
+
+v_pk_min_u16 v5, v255, v255
+// GFX11: [0x05,0x40,0x0d,0xcc,0xff,0xff,0x03,0x18]
+
+v_pk_min_u16 v5, s1, s2
+// GFX11: [0x05,0x40,0x0d,0xcc,0x01,0x04,0x00,0x18]
+
+v_pk_min_u16 v5, s105, s105
+// GFX11: [0x05,0x40,0x0d,0xcc,0x69,0xd2,0x00,0x18]
+
+v_pk_min_u16 v5, vcc_lo, ttmp15
+// GFX11: [0x05,0x40,0x0d,0xcc,0x6a,0xf6,0x00,0x18]
+
+v_pk_min_u16 v5, vcc_hi, 0xfe0b
+// GFX11: [0x05,0x40,0x0d,0xcc,0x6b,0xfe,0x01,0x18,0x0b,0xfe,0x00,0x00]
+
+v_pk_min_u16 v5, ttmp15, src_scc
+// GFX11: [0x05,0x40,0x0d,0xcc,0x7b,0xfa,0x01,0x18]
+
+v_pk_min_u16 v5, m0, 0.5
+// GFX11: [0x05,0x40,0x0d,0xcc,0x7d,0xfe,0x01,0x18,0x00,0x38,0x00,0x00]
+
+v_pk_min_u16 v5, exec_lo, -1
+// GFX11: [0x05,0x40,0x0d,0xcc,0x7e,0x82,0x01,0x18]
+
+v_pk_min_u16 v5, exec_hi, null
+// GFX11: [0x05,0x40,0x0d,0xcc,0x7f,0xf8,0x00,0x18]
+
+v_pk_min_u16 v5, null, exec_lo
+// GFX11: [0x05,0x40,0x0d,0xcc,0x7c,0xfc,0x00,0x18]
+
+v_pk_min_u16 v5, -1, exec_hi op_sel:[1,1] op_sel_hi:[0,0]
+// GFX11: [0x05,0x58,0x0d,0xcc,0xc1,0xfe,0x00,0x00]
+
+v_pk_min_u16 v5, 0.5, m0 op_sel:[0,0] op_sel_hi:[1,1]
+// GFX11: [0x05,0x40,0x0d,0xcc,0xff,0xfa,0x00,0x18,0x00,0x38,0x00,0x00]
+
+v_pk_min_u16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1]
+// GFX11: [0x05,0x48,0x0d,0xcc,0xfd,0xd4,0x00,0x10]
+
+v_pk_min_u16 v255, 0xfe0b, vcc_hi op_sel:[0,1] op_sel_hi:[1,0]
+// GFX11: [0xff,0x50,0x0d,0xcc,0xff,0xd6,0x00,0x08,0x0b,0xfe,0x00,0x00]
+
+v_pk_mul_f16 v5, v1, v2
+// GFX11: [0x05,0x40,0x10,0xcc,0x01,0x05,0x02,0x18]
+
+v_pk_mul_f16 v5, v255, v255
+// GFX11: [0x05,0x40,0x10,0xcc,0xff,0xff,0x03,0x18]
+
+v_pk_mul_f16 v5, s1, s2
+// GFX11: [0x05,0x40,0x10,0xcc,0x01,0x04,0x00,0x18]
+
+v_pk_mul_f16 v5, s105, s105
+// GFX11: [0x05,0x40,0x10,0xcc,0x69,0xd2,0x00,0x18]
+
+v_pk_mul_f16 v5, vcc_lo, ttmp15
+// GFX11: [0x05,0x40,0x10,0xcc,0x6a,0xf6,0x00,0x18]
+
+v_pk_mul_f16 v5, vcc_hi, 0xfe0b
+// GFX11: [0x05,0x40,0x10,0xcc,0x6b,0xfe,0x01,0x18,0x0b,0xfe,0x00,0x00]
+
+v_pk_mul_f16 v5, ttmp15, src_scc
+// GFX11: [0x05,0x40,0x10,0xcc,0x7b,0xfa,0x01,0x18]
+
+v_pk_mul_f16 v5, m0, 0.5
+// GFX11: [0x05,0x40,0x10,0xcc,0x7d,0xe0,0x01,0x18]
+
+v_pk_mul_f16 v5, exec_lo, -1
+// GFX11: [0x05,0x40,0x10,0xcc,0x7e,0x82,0x01,0x18]
+
+v_pk_mul_f16 v5, exec_hi, null
+// GFX11: [0x05,0x40,0x10,0xcc,0x7f,0xf8,0x00,0x18]
+
+v_pk_mul_f16 v5, null, exec_lo
+// GFX11: [0x05,0x40,0x10,0xcc,0x7c,0xfc,0x00,0x18]
+
+v_pk_mul_f16 v5, -1, exec_hi op_sel:[1,1] op_sel_hi:[0,0] neg_lo:[1,0] neg_hi:[1,0]
+// GFX11: [0x05,0x59,0x10,0xcc,0xc1,0xfe,0x00,0x20]
+
+v_pk_mul_f16 v5, 0.5, m0 op_sel:[0,0] op_sel_hi:[1,1] neg_lo:[0,1] neg_hi:[0,1]
+// GFX11: [0x05,0x42,0x10,0xcc,0xf0,0xfa,0x00,0x58]
+
+v_pk_mul_f16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1] neg_lo:[0,0] neg_hi:[0,0]
+// GFX11: [0x05,0x48,0x10,0xcc,0xfd,0xd4,0x00,0x10]
+
+v_pk_mul_f16 v255, 0xfe0b, vcc_hi op_sel:[0,1] op_sel_hi:[1,0] neg_lo:[1,1] neg_hi:[1,1] clamp
+// GFX11: [0xff,0xd3,0x10,0xcc,0xff,0xd6,0x00,0x68,0x0b,0xfe,0x00,0x00]
+
+v_pk_mul_lo_u16 v5, v1, v2
+// GFX11: [0x05,0x40,0x01,0xcc,0x01,0x05,0x02,0x18]
+
+v_pk_mul_lo_u16 v5, v255, v255
+// GFX11: [0x05,0x40,0x01,0xcc,0xff,0xff,0x03,0x18]
+
+v_pk_mul_lo_u16 v5, s1, s2
+// GFX11: [0x05,0x40,0x01,0xcc,0x01,0x04,0x00,0x18]
+
+v_pk_mul_lo_u16 v5, s105, s105
+// GFX11: [0x05,0x40,0x01,0xcc,0x69,0xd2,0x00,0x18]
+
+v_pk_mul_lo_u16 v5, vcc_lo, ttmp15
+// GFX11: [0x05,0x40,0x01,0xcc,0x6a,0xf6,0x00,0x18]
+
+v_pk_mul_lo_u16 v5, vcc_hi, 0xfe0b
+// GFX11: [0x05,0x40,0x01,0xcc,0x6b,0xfe,0x01,0x18,0x0b,0xfe,0x00,0x00]
+
+v_pk_mul_lo_u16 v5, ttmp15, src_scc
+// GFX11: [0x05,0x40,0x01,0xcc,0x7b,0xfa,0x01,0x18]
+
+v_pk_mul_lo_u16 v5, m0, 0.5
+// GFX11: [0x05,0x40,0x01,0xcc,0x7d,0xfe,0x01,0x18,0x00,0x38,0x00,0x00]
+
+v_pk_mul_lo_u16 v5, exec_lo, -1
+// GFX11: [0x05,0x40,0x01,0xcc,0x7e,0x82,0x01,0x18]
+
+v_pk_mul_lo_u16 v5, exec_hi, null
+// GFX11: [0x05,0x40,0x01,0xcc,0x7f,0xf8,0x00,0x18]
+
+v_pk_mul_lo_u16 v5, null, exec_lo
+// GFX11: [0x05,0x40,0x01,0xcc,0x7c,0xfc,0x00,0x18]
+
+v_pk_mul_lo_u16 v5, -1, exec_hi op_sel:[1,1] op_sel_hi:[0,0]
+// GFX11: [0x05,0x58,0x01,0xcc,0xc1,0xfe,0x00,0x00]
+
+v_pk_mul_lo_u16 v5, 0.5, m0 op_sel:[0,0] op_sel_hi:[1,1]
+// GFX11: [0x05,0x40,0x01,0xcc,0xff,0xfa,0x00,0x18,0x00,0x38,0x00,0x00]
+
+v_pk_mul_lo_u16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1]
+// GFX11: [0x05,0x48,0x01,0xcc,0xfd,0xd4,0x00,0x10]
+
+v_pk_mul_lo_u16 v255, 0xfe0b, vcc_hi op_sel:[0,1] op_sel_hi:[1,0]
+// GFX11: [0xff,0x50,0x01,0xcc,0xff,0xd6,0x00,0x08,0x0b,0xfe,0x00,0x00]
+
+v_pk_sub_i16 v5, v1, v2
+// GFX11: [0x05,0x40,0x03,0xcc,0x01,0x05,0x02,0x18]
+
+v_pk_sub_i16 v5, v255, v255
+// GFX11: [0x05,0x40,0x03,0xcc,0xff,0xff,0x03,0x18]
+
+v_pk_sub_i16 v5, s1, s2
+// GFX11: [0x05,0x40,0x03,0xcc,0x01,0x04,0x00,0x18]
+
+v_pk_sub_i16 v5, s105, s105
+// GFX11: [0x05,0x40,0x03,0xcc,0x69,0xd2,0x00,0x18]
+
+v_pk_sub_i16 v5, vcc_lo, ttmp15
+// GFX11: [0x05,0x40,0x03,0xcc,0x6a,0xf6,0x00,0x18]
+
+v_pk_sub_i16 v5, vcc_hi, 0xfe0b
+// GFX11: [0x05,0x40,0x03,0xcc,0x6b,0xfe,0x01,0x18,0x0b,0xfe,0x00,0x00]
+
+v_pk_sub_i16 v5, ttmp15, src_scc
+// GFX11: [0x05,0x40,0x03,0xcc,0x7b,0xfa,0x01,0x18]
+
+v_pk_sub_i16 v5, m0, 0.5
+// GFX11: [0x05,0x40,0x03,0xcc,0x7d,0xfe,0x01,0x18,0x00,0x38,0x00,0x00]
+
+v_pk_sub_i16 v5, exec_lo, -1
+// GFX11: [0x05,0x40,0x03,0xcc,0x7e,0x82,0x01,0x18]
+
+v_pk_sub_i16 v5, exec_hi, null
+// GFX11: [0x05,0x40,0x03,0xcc,0x7f,0xf8,0x00,0x18]
+
+v_pk_sub_i16 v5, null, exec_lo
+// GFX11: [0x05,0x40,0x03,0xcc,0x7c,0xfc,0x00,0x18]
+
+v_pk_sub_i16 v5, -1, exec_hi op_sel:[1,1] op_sel_hi:[0,0]
+// GFX11: [0x05,0x58,0x03,0xcc,0xc1,0xfe,0x00,0x00]
+
+v_pk_sub_i16 v5, 0.5, m0 op_sel:[0,0] op_sel_hi:[1,1]
+// GFX11: [0x05,0x40,0x03,0xcc,0xff,0xfa,0x00,0x18,0x00,0x38,0x00,0x00]
+
+v_pk_sub_i16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1]
+// GFX11: [0x05,0x48,0x03,0xcc,0xfd,0xd4,0x00,0x10]
+
+v_pk_sub_i16 v255, 0xfe0b, vcc_hi op_sel:[0,1] op_sel_hi:[1,0] clamp
+// GFX11: [0xff,0xd0,0x03,0xcc,0xff,0xd6,0x00,0x08,0x0b,0xfe,0x00,0x00]
+
+v_pk_sub_u16 v5, v1, v2
+// GFX11: [0x05,0x40,0x0b,0xcc,0x01,0x05,0x02,0x18]
+
+v_pk_sub_u16 v5, v255, v255
+// GFX11: [0x05,0x40,0x0b,0xcc,0xff,0xff,0x03,0x18]
+
+v_pk_sub_u16 v5, s1, s2
+// GFX11: [0x05,0x40,0x0b,0xcc,0x01,0x04,0x00,0x18]
+
+v_pk_sub_u16 v5, s105, s105
+// GFX11: [0x05,0x40,0x0b,0xcc,0x69,0xd2,0x00,0x18]
+
+v_pk_sub_u16 v5, vcc_lo, ttmp15
+// GFX11: [0x05,0x40,0x0b,0xcc,0x6a,0xf6,0x00,0x18]
+
+v_pk_sub_u16 v5, vcc_hi, 0xfe0b
+// GFX11: [0x05,0x40,0x0b,0xcc,0x6b,0xfe,0x01,0x18,0x0b,0xfe,0x00,0x00]
+
+v_pk_sub_u16 v5, ttmp15, src_scc
+// GFX11: [0x05,0x40,0x0b,0xcc,0x7b,0xfa,0x01,0x18]
+
+v_pk_sub_u16 v5, m0, 0.5
+// GFX11: [0x05,0x40,0x0b,0xcc,0x7d,0xfe,0x01,0x18,0x00,0x38,0x00,0x00]
+
+v_pk_sub_u16 v5, exec_lo, -1
+// GFX11: [0x05,0x40,0x0b,0xcc,0x7e,0x82,0x01,0x18]
+
+v_pk_sub_u16 v5, exec_hi, null
+// GFX11: [0x05,0x40,0x0b,0xcc,0x7f,0xf8,0x00,0x18]
+
+v_pk_sub_u16 v5, null, exec_lo
+// GFX11: [0x05,0x40,0x0b,0xcc,0x7c,0xfc,0x00,0x18]
+
+v_pk_sub_u16 v5, -1, exec_hi op_sel:[1,1] op_sel_hi:[0,0]
+// GFX11: [0x05,0x58,0x0b,0xcc,0xc1,0xfe,0x00,0x00]
+
+v_pk_sub_u16 v5, 0.5, m0 op_sel:[0,0] op_sel_hi:[1,1]
+// GFX11: [0x05,0x40,0x0b,0xcc,0xff,0xfa,0x00,0x18,0x00,0x38,0x00,0x00]
+
+v_pk_sub_u16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1]
+// GFX11: [0x05,0x48,0x0b,0xcc,0xfd,0xd4,0x00,0x10]
+
+v_pk_sub_u16 v255, 0xfe0b, vcc_hi op_sel:[0,1] op_sel_hi:[1,0] clamp
+// GFX11: [0xff,0xd0,0x0b,0xcc,0xff,0xd6,0x00,0x08,0x0b,0xfe,0x00,0x00]
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3p_features.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3p_features.s
new file mode 100644
index 000000000000..b1458c5100d8
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3p_features.s
@@ -0,0 +1,143 @@
+// RUN: llvm-mc -arch=amdgcn -show-encoding -mcpu=gfx1100 %s | FileCheck --check-prefix=GFX11 %s
+
+//
+// Test op_sel/op_sel_hi
+//
+
+v_pk_add_u16 v1, v2, v3
+// GFX11: encoding: [0x01,0x40,0x0a,0xcc,0x02,0x07,0x02,0x18]
+
+v_pk_add_u16 v1, v2, v3 op_sel:[0,0]
+// GFX11: v_pk_add_u16 v1, v2, v3 ; encoding: [0x01,0x40,0x0a,0xcc,0x02,0x07,0x02,0x18]
+
+v_pk_add_u16 v1, v2, v3 op_sel_hi:[1,1]
+// GFX11: v_pk_add_u16 v1, v2, v3 ; encoding: [0x01,0x40,0x0a,0xcc,0x02,0x07,0x02,0x18]
+
+v_pk_add_u16 v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,1]
+// GFX11: v_pk_add_u16 v1, v2, v3 ; encoding: [0x01,0x40,0x0a,0xcc,0x02,0x07,0x02,0x18]
+
+v_pk_add_u16 v1, v2, v3 op_sel_hi:[0,0]
+// GFX11: v_pk_add_u16 v1, v2, v3 op_sel_hi:[0,0] ; encoding: [0x01,0x40,0x0a,0xcc,0x02,0x07,0x02,0x00]
+
+v_pk_add_u16 v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,0]
+// GFX11: v_pk_add_u16 v1, v2, v3 op_sel_hi:[0,0] ; encoding: [0x01,0x40,0x0a,0xcc,0x02,0x07,0x02,0x00]
+
+v_pk_add_u16 v1, v2, v3 op_sel:[1,0]
+// GFX11: v_pk_add_u16 v1, v2, v3 op_sel:[1,0] ; encoding: [0x01,0x48,0x0a,0xcc,0x02,0x07,0x02,0x18]
+
+v_pk_add_u16 v1, v2, v3 op_sel:[0,1]
+// GFX11: v_pk_add_u16 v1, v2, v3 op_sel:[0,1] ; encoding: [0x01,0x50,0x0a,0xcc,0x02,0x07,0x02,0x18]
+
+v_pk_add_u16 v1, v2, v3 op_sel:[1,1]
+// GFX11: v_pk_add_u16 v1, v2, v3 op_sel:[1,1] ; encoding: [0x01,0x58,0x0a,0xcc,0x02,0x07,0x02,0x18]
+
+v_pk_add_u16 v1, v2, v3 op_sel_hi:[0,1]
+// GFX11: v_pk_add_u16 v1, v2, v3 op_sel_hi:[0,1] ; encoding: [0x01,0x40,0x0a,0xcc,0x02,0x07,0x02,0x10]
+
+v_pk_add_u16 v1, v2, v3 op_sel_hi:[1,0]
+// GFX11: v_pk_add_u16 v1, v2, v3 op_sel_hi:[1,0] ; encoding: [0x01,0x40,0x0a,0xcc,0x02,0x07,0x02,0x08]
+
+v_pk_add_u16 v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,1]
+// GFX11: v_pk_add_u16 v1, v2, v3 op_sel:[1,1] ; encoding: [0x01,0x58,0x0a,0xcc,0x02,0x07,0x02,0x18]
+
+v_pk_add_u16 v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,0]
+// GFX11: v_pk_add_u16 v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,0] ; encoding: [0x01,0x48,0x0a,0xcc,0x02,0x07,0x02,0x08]
+
+v_pk_add_u16 v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,1]
+// GFX11: v_pk_add_u16 v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,1] ; encoding: [0x01,0x50,0x0a,0xcc,0x02,0x07,0x02,0x10]
+
+v_pk_add_u16 v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,1]
+// GFX11: v_pk_add_u16 v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,1] ; encoding: [0x01,0x48,0x0a,0xcc,0x02,0x07,0x02,0x10]
+
+v_pk_add_u16 v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,0]
+// GFX11: v_pk_add_u16 v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,0] ; encoding: [0x01,0x50,0x0a,0xcc,0x02,0x07,0x02,0x08]
+
+//
+// Test src2 op_sel/op_sel_hi
+//
+
+v_pk_fma_f16 v8, v0, s0, v1
+// GFX11: encoding: [0x08,0x40,0x0e,0xcc,0x00,0x01,0x04,0x1c]
+
+v_pk_fma_f16 v8, v0, s0, v1 neg_lo:[0,0,0] neg_hi:[0,0,0]
+// GFX11: v_pk_fma_f16 v8, v0, s0, v1 ; encoding: [0x08,0x40,0x0e,0xcc,0x00,0x01,0x04,0x1c]
+
+v_pk_fma_f16 v8, v0, s0, v1 op_sel:[0,0,0] op_sel_hi:[1,1,1] neg_lo:[0,0,0] neg_hi:[0,0,0]
+// GFX11: v_pk_fma_f16 v8, v0, s0, v1 ; encoding: [0x08,0x40,0x0e,0xcc,0x00,0x01,0x04,0x1c]
+
+v_pk_fma_f16 v8, v0, s0, v1 op_sel:[0,0,0] op_sel_hi:[1,1,1]
+// GFX11: v_pk_fma_f16 v8, v0, s0, v1 ; encoding: [0x08,0x40,0x0e,0xcc,0x00,0x01,0x04,0x1c]
+
+v_pk_fma_f16 v8, v0, s0, v1 op_sel:[0,0,0] op_sel_hi:[0,0,0]
+// GFX11: v_pk_fma_f16 v8, v0, s0, v1 op_sel_hi:[0,0,0] ; encoding: [0x08,0x00,0x0e,0xcc,0x00,0x01,0x04,0x04]
+
+v_pk_fma_f16 v8, v0, s0, v1 op_sel:[0,0,1] op_sel_hi:[0,0,1]
+// GFX11: v_pk_fma_f16 v8, v0, s0, v1 op_sel:[0,0,1] op_sel_hi:[0,0,1] ; encoding: [0x08,0x60,0x0e,0xcc,0x00,0x01,0x04,0x04]
+
+//
+// Test neg_lo/neg_hi
+//
+
+v_pk_fma_f16 v8, v0, s0, v1 neg_lo:[1,1,1]
+// GFX11: v_pk_fma_f16 v8, v0, s0, v1 neg_lo:[1,1,1] ; encoding: [0x08,0x40,0x0e,0xcc,0x00,0x01,0x04,0xfc]
+
+v_pk_fma_f16 v8, v0, s0, v1 neg_hi:[1,1,1]
+// GFX11: v_pk_fma_f16 v8, v0, s0, v1 neg_hi:[1,1,1] ; encoding: [0x08,0x47,0x0e,0xcc,0x00,0x01,0x04,0x1c]
+
+v_pk_fma_f16 v8, v0, s0, v1 neg_lo:[1,1,1] neg_hi:[1,1,1]
+// GFX11: v_pk_fma_f16 v8, v0, s0, v1 neg_lo:[1,1,1] neg_hi:[1,1,1] ; encoding: [0x08,0x47,0x0e,0xcc,0x00,0x01,0x04,0xfc]
+
+v_pk_fma_f16 v8, v0, s0, v1 neg_lo:[1,0,0]
+// GFX11: v_pk_fma_f16 v8, v0, s0, v1 neg_lo:[1,0,0] ; encoding: [0x08,0x40,0x0e,0xcc,0x00,0x01,0x04,0x3c]
+
+v_pk_fma_f16 v8, v0, s0, v1 neg_lo:[0,1,0]
+// GFX11: v_pk_fma_f16 v8, v0, s0, v1 neg_lo:[0,1,0] ; encoding: [0x08,0x40,0x0e,0xcc,0x00,0x01,0x04,0x5c]
+
+v_pk_fma_f16 v8, v0, s0, v1 neg_lo:[0,0,1]
+// GFX11: v_pk_fma_f16 v8, v0, s0, v1 neg_lo:[0,0,1] ; encoding: [0x08,0x40,0x0e,0xcc,0x00,0x01,0x04,0x9c]
+
+v_pk_fma_f16 v8, v0, s0, v1 neg_hi:[1,0,0]
+// GFX11: v_pk_fma_f16 v8, v0, s0, v1 neg_hi:[1,0,0] ; encoding: [0x08,0x41,0x0e,0xcc,0x00,0x01,0x04,0x1c]
+
+v_pk_fma_f16 v8, v0, s0, v1 neg_hi:[0,1,0]
+// GFX11: v_pk_fma_f16 v8, v0, s0, v1 neg_hi:[0,1,0] ; encoding: [0x08,0x42,0x0e,0xcc,0x00,0x01,0x04,0x1c]
+
+v_pk_fma_f16 v8, v0, s0, v1 neg_hi:[0,0,1]
+// GFX11: v_pk_fma_f16 v8, v0, s0, v1 neg_hi:[0,0,1] ; encoding: [0x08,0x44,0x0e,0xcc,0x00,0x01,0x04,0x1c]
+
+//
+// DOT
+//
+
+v_dot4_i32_iu8 v3, v4, v5, v6
+// GFX11: v_dot4_i32_iu8 v3, v4, v5, v6 ; encoding: [0x03,0x40,0x16,0xcc,0x04,0x0b,0x1a,0x1c]
+
+v_dot4_i32_iu8 v3, v4, v5, 0xf neg_lo:[1,1]
+// GFX11: v_dot4_i32_iu8 v3, v4, v5, 15 neg_lo:[1,1,0] ; encoding: [0x03,0x40,0x16,0xcc,0x04,0x0b,0x3e,0x7a]
+
+v_dot4_u32_u8 v3, v4, v5, v6
+// GFX11: v_dot4_u32_u8 v3, v4, v5, v6 ; encoding: [0x03,0x40,0x17,0xcc,0x04,0x0b,0x1a,0x1c]
+
+v_dot4_i32_iu8 v3, v4, v5, 0xf
+// GFX11: v_dot4_i32_iu8 v3, v4, v5, 15 ; encoding: [0x03,0x40,0x16,0xcc,0x04,0x0b,0x3e,0x1a]
+
+v_dot8_i32_iu4 v3, v4, v5, 0xf neg_lo:[1,0]
+// GFX11: v_dot8_i32_iu4 v3, v4, v5, 15 neg_lo:[1,0,0] ; encoding: [0x03,0x40,0x18,0xcc,0x04,0x0b,0x3e,0x3a]
+
+v_dot8_i32_iu4 v3, v4, v5, v0 neg_lo:[0,0]
+// GFX11: v_dot8_i32_iu4 v3, v4, v5, v0 ; encoding: [0x03,0x40,0x18,0xcc,0x04,0x0b,0x02,0x1c]
+
+v_dot8_u32_u4 v0, v1, v2, v3
+// GFX11: v_dot8_u32_u4 v0, v1, v2, v3 ; encoding: [0x00,0x40,0x19,0xcc,0x01,0x05,0x0e,0x1c]
+
+v_dot2_f32_f16 v0, v1, v2, v3
+// GFX11: v_dot2_f32_f16 v0, v1, v2, v3 ; encoding: [0x00,0x40,0x13,0xcc,0x01,0x05,0x0e,0x1c]
+
+v_dot2_f32_f16 v0, v1, v2, v3 neg_lo:[1,1,0] neg_hi:[1,0,1]
+// GFX11: v_dot2_f32_f16 v0, v1, v2, v3 neg_lo:[1,1,0] neg_hi:[1,0,1] ; encoding: [0x00,0x45,0x13,0xcc,0x01,0x05,0x0e,0x7c]
+
+v_dot2_f32_bf16 v0, v1, v2, v3
+// GFX11: v_dot2_f32_bf16 v0, v1, v2, v3 ; encoding: [0x00,0x40,0x1a,0xcc,0x01,0x05,0x0e,0x1c]
+
+v_dot2_f32_bf16 v0, v1, v2, v3 neg_lo:[1,1,0] neg_hi:[1,0,1]
+// GFX11: v_dot2_f32_bf16 v0, v1, v2, v3 neg_lo:[1,1,0] neg_hi:[1,0,1] ; encoding: [0x00,0x45,0x1a,0xcc,0x01,0x05,0x0e,0x3c]
More information about the llvm-commits
mailing list