[llvm] [AMDGPU][True16][MC] true16 for v_cmpx_xx_u/i16 (PR #123424)

via llvm-commits llvm-commits at lists.llvm.org
Sun Jan 19 21:53:58 PST 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-amdgpu

Author: Brox Chen (broxigarchen)

<details>
<summary>Changes</summary>

A bulk commit of true16 support for v_cmp_xx_i/u16 instructions including:

v_cmpx_lt_i16
v_cmpx_eq_i16
v_cmpx_le_i16
v_cmpx_gt_i16
v_cmpx_ne_i16
v_cmpx_ge_i16
v_cmpx_lt_u16
v_cmpx_eq_u16
v_cmpx_le_u16
v_cmpx_gt_u16
v_cmpx_ne_u16
v_cmpx_ge_u16

---

Patch is 1.43 MiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/123424.diff


33 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/VOPCInstructions.td (+12-12) 
- (modified) llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vopcx.s (+444-336) 
- (modified) llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8_from_vopcx.s (+180-72) 
- (modified) llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vopcx.s (+120-48) 
- (modified) llvm/test/MC/AMDGPU/gfx11_asm_vopc_t16_err.s (+1-1) 
- (modified) llvm/test/MC/AMDGPU/gfx11_asm_vopc_t16_promote.s (+1-1) 
- (modified) llvm/test/MC/AMDGPU/gfx11_asm_vopcx.s (+540-360) 
- (modified) llvm/test/MC/AMDGPU/gfx11_asm_vopcx_dpp16.s (+444-336) 
- (modified) llvm/test/MC/AMDGPU/gfx11_asm_vopcx_dpp8.s (+180-72) 
- (modified) llvm/test/MC/AMDGPU/gfx11_asm_vopcx_t16_err.s (+360-144) 
- (modified) llvm/test/MC/AMDGPU/gfx11_asm_vopcx_t16_promote.s (+360-144) 
- (modified) llvm/test/MC/AMDGPU/gfx12_asm_vop3cx.s (+120-48) 
- (modified) llvm/test/MC/AMDGPU/gfx12_asm_vop3cx_dpp16.s (+492-384) 
- (modified) llvm/test/MC/AMDGPU/gfx12_asm_vop3cx_dpp8.s (+228-120) 
- (modified) llvm/test/MC/AMDGPU/gfx12_asm_vopc_t16_err.s (+1-1) 
- (modified) llvm/test/MC/AMDGPU/gfx12_asm_vopc_t16_promote.s (+1-1) 
- (modified) llvm/test/MC/AMDGPU/gfx12_asm_vopcx.s (+504-360) 
- (modified) llvm/test/MC/AMDGPU/gfx12_asm_vopcx_dpp16.s (+408-336) 
- (modified) llvm/test/MC/AMDGPU/gfx12_asm_vopcx_dpp8.s (+144-72) 
- (modified) llvm/test/MC/AMDGPU/gfx12_asm_vopcx_t16_err.s (+360-144) 
- (modified) llvm/test/MC/AMDGPU/gfx12_asm_vopcx_t16_promote.s (+360-144) 
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16_from_vopcx.txt (+480-168) 
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8_from_vopcx.txt (+192-24) 
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_from_vopcx.txt (+144-24) 
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vopcx.txt (+600-180) 
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vopcx_dpp16.txt (+480-168) 
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vopcx_dpp8.txt (+264-24) 
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3cx.txt (+168-24) 
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3cx_dpp16.txt () 
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3cx_dpp8.txt () 
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vopcx.txt (+552-180) 
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vopcx_dpp16.txt (+432-168) 
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vopcx_dpp8.txt (+144-24) 


``````````diff
diff --git a/llvm/lib/Target/AMDGPU/VOPCInstructions.td b/llvm/lib/Target/AMDGPU/VOPCInstructions.td
index bba8aa570d2b58..ad59093c97ecce 100644
--- a/llvm/lib/Target/AMDGPU/VOPCInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOPCInstructions.td
@@ -1975,18 +1975,18 @@ defm V_CMPX_NEQ_F64   : VOPCX_Real_gfx11_gfx12<0x0ad>;
 defm V_CMPX_NLT_F64   : VOPCX_Real_gfx11_gfx12<0x0ae>;
 defm V_CMPX_T_F64     : VOPCX_Real_with_name_gfx11<0x0af, "V_CMPX_TRU_F64", "v_cmpx_t_f64">;
 
-defm V_CMPX_LT_I16_fake16    : VOPCX_Real_t16_gfx11_gfx12<0x0b1, "v_cmpx_lt_i16">;
-defm V_CMPX_EQ_I16_fake16    : VOPCX_Real_t16_gfx11_gfx12<0x0b2, "v_cmpx_eq_i16">;
-defm V_CMPX_LE_I16_fake16    : VOPCX_Real_t16_gfx11_gfx12<0x0b3, "v_cmpx_le_i16">;
-defm V_CMPX_GT_I16_fake16    : VOPCX_Real_t16_gfx11_gfx12<0x0b4, "v_cmpx_gt_i16">;
-defm V_CMPX_NE_I16_fake16    : VOPCX_Real_t16_gfx11_gfx12<0x0b5, "v_cmpx_ne_i16">;
-defm V_CMPX_GE_I16_fake16    : VOPCX_Real_t16_gfx11_gfx12<0x0b6, "v_cmpx_ge_i16">;
-defm V_CMPX_LT_U16_fake16    : VOPCX_Real_t16_gfx11_gfx12<0x0b9, "v_cmpx_lt_u16">;
-defm V_CMPX_EQ_U16_fake16    : VOPCX_Real_t16_gfx11_gfx12<0x0ba, "v_cmpx_eq_u16">;
-defm V_CMPX_LE_U16_fake16    : VOPCX_Real_t16_gfx11_gfx12<0x0bb, "v_cmpx_le_u16">;
-defm V_CMPX_GT_U16_fake16    : VOPCX_Real_t16_gfx11_gfx12<0x0bc, "v_cmpx_gt_u16">;
-defm V_CMPX_NE_U16_fake16    : VOPCX_Real_t16_gfx11_gfx12<0x0bd, "v_cmpx_ne_u16">;
-defm V_CMPX_GE_U16_fake16    : VOPCX_Real_t16_gfx11_gfx12<0x0be, "v_cmpx_ge_u16">;
+defm V_CMPX_LT_I16    : VOPCX_Real_t16_and_fake16_gfx11_gfx12<0x0b1, "v_cmpx_lt_i16">;
+defm V_CMPX_EQ_I16    : VOPCX_Real_t16_and_fake16_gfx11_gfx12<0x0b2, "v_cmpx_eq_i16">;
+defm V_CMPX_LE_I16    : VOPCX_Real_t16_and_fake16_gfx11_gfx12<0x0b3, "v_cmpx_le_i16">;
+defm V_CMPX_GT_I16    : VOPCX_Real_t16_and_fake16_gfx11_gfx12<0x0b4, "v_cmpx_gt_i16">;
+defm V_CMPX_NE_I16    : VOPCX_Real_t16_and_fake16_gfx11_gfx12<0x0b5, "v_cmpx_ne_i16">;
+defm V_CMPX_GE_I16    : VOPCX_Real_t16_and_fake16_gfx11_gfx12<0x0b6, "v_cmpx_ge_i16">;
+defm V_CMPX_LT_U16    : VOPCX_Real_t16_and_fake16_gfx11_gfx12<0x0b9, "v_cmpx_lt_u16">;
+defm V_CMPX_EQ_U16    : VOPCX_Real_t16_and_fake16_gfx11_gfx12<0x0ba, "v_cmpx_eq_u16">;
+defm V_CMPX_LE_U16    : VOPCX_Real_t16_and_fake16_gfx11_gfx12<0x0bb, "v_cmpx_le_u16">;
+defm V_CMPX_GT_U16    : VOPCX_Real_t16_and_fake16_gfx11_gfx12<0x0bc, "v_cmpx_gt_u16">;
+defm V_CMPX_NE_U16    : VOPCX_Real_t16_and_fake16_gfx11_gfx12<0x0bd, "v_cmpx_ne_u16">;
+defm V_CMPX_GE_U16    : VOPCX_Real_t16_and_fake16_gfx11_gfx12<0x0be, "v_cmpx_ge_u16">;
 
 defm V_CMPX_F_I32     : VOPCX_Real_gfx11<0x0c0>;
 defm V_CMPX_LT_I32    : VOPCX_Real_gfx11_gfx12<0x0c1>;
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vopcx.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vopcx.s
index 60ec94446235ed..b7efd987e14902 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vopcx.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vopcx.s
@@ -171,47 +171,56 @@ v_cmpx_eq_f32_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctr
 v_cmpx_eq_f32_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
 // GFX11: v_cmpx_eq_f32_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x83,0x92,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
 
-v_cmpx_eq_i16_e64_dpp v1, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmpx_eq_i16_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb2,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+v_cmpx_eq_i16_e64_dpp v1.l, v2.l quad_perm:[3,2,1,0]
+// GFX11: v_cmpx_eq_i16_e64_dpp v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb2,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmpx_eq_i16_e64_dpp v1, v2 quad_perm:[0,1,2,3]
-// GFX11: v_cmpx_eq_i16_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb2,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+v_cmpx_eq_i16_e64_dpp v1.l, v2.l quad_perm:[0,1,2,3]
+// GFX11: v_cmpx_eq_i16_e64_dpp v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb2,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
 
-v_cmpx_eq_i16_e64_dpp v1, v2 row_mirror
-// GFX11: v_cmpx_eq_i16_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb2,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+v_cmpx_eq_i16_e64_dpp v1.l, v2.l row_mirror
+// GFX11: v_cmpx_eq_i16_e64_dpp v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb2,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
 
-v_cmpx_eq_i16_e64_dpp v1, v2 row_half_mirror
-// GFX11: v_cmpx_eq_i16_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb2,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+v_cmpx_eq_i16_e64_dpp v1.l, v2.l row_half_mirror
+// GFX11: v_cmpx_eq_i16_e64_dpp v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb2,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
 
-v_cmpx_eq_i16_e64_dpp v1, v2 row_shl:1
-// GFX11: v_cmpx_eq_i16_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb2,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+v_cmpx_eq_i16_e64_dpp v1.l, v2.l row_shl:1
+// GFX11: v_cmpx_eq_i16_e64_dpp v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb2,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
 
-v_cmpx_eq_i16_e64_dpp v1, v2 row_shl:15
-// GFX11: v_cmpx_eq_i16_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb2,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+v_cmpx_eq_i16_e64_dpp v1.l, v2.l row_shl:15
+// GFX11: v_cmpx_eq_i16_e64_dpp v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb2,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
 
-v_cmpx_eq_i16_e64_dpp v1, v2 row_shr:1
-// GFX11: v_cmpx_eq_i16_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb2,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+v_cmpx_eq_i16_e64_dpp v1.l, v2.l row_shr:1
+// GFX11: v_cmpx_eq_i16_e64_dpp v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb2,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
 
-v_cmpx_eq_i16_e64_dpp v1, v2 row_shr:15
-// GFX11: v_cmpx_eq_i16_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb2,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+v_cmpx_eq_i16_e64_dpp v1.l, v2.l row_shr:15
+// GFX11: v_cmpx_eq_i16_e64_dpp v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb2,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
 
-v_cmpx_eq_i16_e64_dpp v1, v2 row_ror:1
-// GFX11: v_cmpx_eq_i16_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb2,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+v_cmpx_eq_i16_e64_dpp v1.l, v2.l row_ror:1
+// GFX11: v_cmpx_eq_i16_e64_dpp v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb2,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
 
-v_cmpx_eq_i16_e64_dpp v1, v2 row_ror:15
-// GFX11: v_cmpx_eq_i16_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb2,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+v_cmpx_eq_i16_e64_dpp v1.l, v2.l row_ror:15
+// GFX11: v_cmpx_eq_i16_e64_dpp v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb2,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
 
-v_cmpx_eq_i16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// GFX11: v_cmpx_eq_i16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb2,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+v_cmpx_eq_i16_e64_dpp v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: v_cmpx_eq_i16_e64_dpp v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb2,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
 
-v_cmpx_eq_i16_e64_dpp v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// GFX11: v_cmpx_eq_i16_e64_dpp v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x00,0xb2,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+v_cmpx_eq_i16_e64_dpp v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: v_cmpx_eq_i16_e64_dpp v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x00,0xb2,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
 
-v_cmpx_eq_i16_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// GFX11: v_cmpx_eq_i16_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x00,0xb2,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+v_cmpx_eq_i16_e64_dpp v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: v_cmpx_eq_i16_e64_dpp v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x00,0xb2,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
 
-v_cmpx_eq_i16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// GFX11: v_cmpx_eq_i16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x00,0xb2,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
+v_cmpx_eq_i16_e64_dpp v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: v_cmpx_eq_i16_e64_dpp v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x00,0xb2,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
+
+v_cmpx_eq_i16_e64_dpp v1.h, v2.h row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: v_cmpx_eq_i16_e64_dpp v1.h, v2.h op_sel:[1,1] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x18,0xb2,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+
+v_cmpx_eq_i16_e64_dpp v1.h, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: v_cmpx_eq_i16_e64_dpp v1.h, v2.l op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x08,0xb2,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+
+v_cmpx_eq_i16_e64_dpp v255.l, v255.h row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: v_cmpx_eq_i16_e64_dpp v255.l, v255.h op_sel:[0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x10,0xb2,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
 
 v_cmpx_eq_i32_e64_dpp v1, v2 quad_perm:[3,2,1,0]
 // GFX11: v_cmpx_eq_i32_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc2,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
@@ -255,47 +264,56 @@ v_cmpx_eq_i32_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
 v_cmpx_eq_i32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
 // GFX11: v_cmpx_eq_i32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x00,0xc2,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
 
-v_cmpx_eq_u16_e64_dpp v1, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmpx_eq_u16_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xba,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+v_cmpx_eq_u16_e64_dpp v1.l, v2.l quad_perm:[3,2,1,0]
+// GFX11: v_cmpx_eq_u16_e64_dpp v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xba,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_eq_u16_e64_dpp v1.l, v2.l quad_perm:[0,1,2,3]
+// GFX11: v_cmpx_eq_u16_e64_dpp v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xba,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+
+v_cmpx_eq_u16_e64_dpp v1.l, v2.l row_mirror
+// GFX11: v_cmpx_eq_u16_e64_dpp v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xba,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
 
-v_cmpx_eq_u16_e64_dpp v1, v2 quad_perm:[0,1,2,3]
-// GFX11: v_cmpx_eq_u16_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xba,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+v_cmpx_eq_u16_e64_dpp v1.l, v2.l row_half_mirror
+// GFX11: v_cmpx_eq_u16_e64_dpp v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xba,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
 
-v_cmpx_eq_u16_e64_dpp v1, v2 row_mirror
-// GFX11: v_cmpx_eq_u16_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xba,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+v_cmpx_eq_u16_e64_dpp v1.l, v2.l row_shl:1
+// GFX11: v_cmpx_eq_u16_e64_dpp v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xba,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
 
-v_cmpx_eq_u16_e64_dpp v1, v2 row_half_mirror
-// GFX11: v_cmpx_eq_u16_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xba,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+v_cmpx_eq_u16_e64_dpp v1.l, v2.l row_shl:15
+// GFX11: v_cmpx_eq_u16_e64_dpp v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xba,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
 
-v_cmpx_eq_u16_e64_dpp v1, v2 row_shl:1
-// GFX11: v_cmpx_eq_u16_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xba,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+v_cmpx_eq_u16_e64_dpp v1.l, v2.l row_shr:1
+// GFX11: v_cmpx_eq_u16_e64_dpp v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xba,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
 
-v_cmpx_eq_u16_e64_dpp v1, v2 row_shl:15
-// GFX11: v_cmpx_eq_u16_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xba,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+v_cmpx_eq_u16_e64_dpp v1.l, v2.l row_shr:15
+// GFX11: v_cmpx_eq_u16_e64_dpp v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xba,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
 
-v_cmpx_eq_u16_e64_dpp v1, v2 row_shr:1
-// GFX11: v_cmpx_eq_u16_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xba,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+v_cmpx_eq_u16_e64_dpp v1.l, v2.l row_ror:1
+// GFX11: v_cmpx_eq_u16_e64_dpp v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xba,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
 
-v_cmpx_eq_u16_e64_dpp v1, v2 row_shr:15
-// GFX11: v_cmpx_eq_u16_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xba,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+v_cmpx_eq_u16_e64_dpp v1.l, v2.l row_ror:15
+// GFX11: v_cmpx_eq_u16_e64_dpp v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xba,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
 
-v_cmpx_eq_u16_e64_dpp v1, v2 row_ror:1
-// GFX11: v_cmpx_eq_u16_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xba,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+v_cmpx_eq_u16_e64_dpp v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: v_cmpx_eq_u16_e64_dpp v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xba,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
 
-v_cmpx_eq_u16_e64_dpp v1, v2 row_ror:15
-// GFX11: v_cmpx_eq_u16_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xba,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+v_cmpx_eq_u16_e64_dpp v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: v_cmpx_eq_u16_e64_dpp v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x00,0xba,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
 
-v_cmpx_eq_u16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// GFX11: v_cmpx_eq_u16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xba,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+v_cmpx_eq_u16_e64_dpp v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: v_cmpx_eq_u16_e64_dpp v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x00,0xba,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
 
-v_cmpx_eq_u16_e64_dpp v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// GFX11: v_cmpx_eq_u16_e64_dpp v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x00,0xba,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+v_cmpx_eq_u16_e64_dpp v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: v_cmpx_eq_u16_e64_dpp v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x00,0xba,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
 
-v_cmpx_eq_u16_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// GFX11: v_cmpx_eq_u16_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x00,0xba,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+v_cmpx_eq_u16_e64_dpp v1.h, v2.h row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: v_cmpx_eq_u16_e64_dpp v1.h, v2.h op_sel:[1,1] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x18,0xba,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
 
-v_cmpx_eq_u16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// GFX11: v_cmpx_eq_u16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x00,0xba,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
+v_cmpx_eq_u16_e64_dpp v1.h, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: v_cmpx_eq_u16_e64_dpp v1.h, v2.l op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x08,0xba,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+
+v_cmpx_eq_u16_e64_dpp v255.l, v255.h row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: v_cmpx_eq_u16_e64_dpp v255.l, v255.h op_sel:[0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x10,0xba,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
 
 v_cmpx_eq_u32_e64_dpp v1, v2 quad_perm:[3,2,1,0]
 // GFX11: v_cmpx_eq_u32_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xca,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
@@ -591,47 +609,56 @@ v_cmpx_ge_f32_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctr
 v_cmpx_ge_f32_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
 // GFX11: v_cmpx_ge_f32_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x83,0x96,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
 
-v_cmpx_ge_i16_e64_dpp v1, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmpx_ge_i16_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb6,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+v_cmpx_ge_i16_e64_dpp v1.l, v2.l quad_perm:[3,2,1,0]
+// GFX11: v_cmpx_ge_i16_e64_dpp v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb6,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_ge_i16_e64_dpp v1.l, v2.l quad_perm:[0,1,2,3]
+// GFX11: v_cmpx_ge_i16_e64_dpp v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb6,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+
+v_cmpx_ge_i16_e64_dpp v1.l, v2.l row_mirror
+// GFX11: v_cmpx_ge_i16_e64_dpp v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb6,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+
+v_cmpx_ge_i16_e64_dpp v1.l, v2.l row_half_mirror
+// GFX11: v_cmpx_ge_i16_e64_dpp v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb6,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
 
-v_cmpx_ge_i16_e64_dpp v1, v2 quad_perm:[0,1,2,3]
-// GFX11: v_cmpx_ge_i16_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb6,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+v_cmpx_ge_i16_e64_dpp v1.l, v2.l row_shl:1
+// GFX11: v_cmpx_ge_i16_e64_dpp v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb6,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
 
-v_cmpx_ge_i16_e64_dpp v1, v2 row_mirror
-// GFX11: v_cmpx_ge_i16_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb6,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+v_cmpx_ge_i16_e64_dpp v1.l, v2.l row_shl:15
+// GFX11: v_cmpx_ge_i16_e64_dpp v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb6,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
 
-v_cmpx_ge_i16_e64_dpp v1, v2 row_half_mirror
-// GFX11: v_cmpx_ge_i16_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb6,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+v_cmpx_ge_i16_e64_dpp v1.l, v2.l row_shr:1
+// GFX11: v_cmpx_ge_i16_e64_dpp v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb6,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
 
-v_cmpx_ge_i16_e64_dpp v1, v2 row_shl:1
-// GFX11: v_cmpx_ge_i16_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x0...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/123424


More information about the llvm-commits mailing list