[llvm] [AMDGPU][True16][MC] true16 for v_cmpx_xx_f16 (PR #123419)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Jan 19 21:53:20 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Brox Chen (broxigarchen)
<details>
<summary>Changes</summary>
A bulk commit of true16 support for v_cmpx_xx_f16 instructions including:
v_cmpx_f_f16
v_cmpx_eq_f16
v_cmpx_le_f16
v_cmpx_gt_f16
v_cmpx_lg_f16
v_cmpx_ge_f16
v_cmpx_o_f16
v_cmpx_u_f16
v_cmpx_nge_f16
v_cmpx_nlg_f16
v_cmpx_ngt_f16
v_cmpx_nle_f16
v_cmpx_neq_f16
v_cmpx_nlt_f16
v_cmpx_t_f16
---
Patch is 1.65 MiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/123419.diff
33 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/VOPCInstructions.td (+15-15)
- (modified) llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vopcx.s (+546-420)
- (modified) llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8_from_vopcx.s (+246-120)
- (modified) llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vopcx.s (+144-60)
- (modified) llvm/test/MC/AMDGPU/gfx11_asm_vopc_t16_err.s (+1-1)
- (modified) llvm/test/MC/AMDGPU/gfx11_asm_vopc_t16_promote.s (+1-1)
- (modified) llvm/test/MC/AMDGPU/gfx11_asm_vopcx.s (+669-444)
- (modified) llvm/test/MC/AMDGPU/gfx11_asm_vopcx_dpp16.s (+549-414)
- (modified) llvm/test/MC/AMDGPU/gfx11_asm_vopcx_dpp8.s (+221-86)
- (modified) llvm/test/MC/AMDGPU/gfx11_asm_vopcx_t16_err.s (+417-165)
- (modified) llvm/test/MC/AMDGPU/gfx11_asm_vopcx_t16_promote.s (+450-180)
- (modified) llvm/test/MC/AMDGPU/gfx12_asm_vop3cx.s (+120-48)
- (modified) llvm/test/MC/AMDGPU/gfx12_asm_vop3cx_dpp16.s (+492-384)
- (modified) llvm/test/MC/AMDGPU/gfx12_asm_vop3cx_dpp8.s (+252-144)
- (modified) llvm/test/MC/AMDGPU/gfx12_asm_vopc_t16_err.s (+1-1)
- (modified) llvm/test/MC/AMDGPU/gfx12_asm_vopc_t16_promote.s (+1-1)
- (modified) llvm/test/MC/AMDGPU/gfx12_asm_vopcx.s (+504-360)
- (modified) llvm/test/MC/AMDGPU/gfx12_asm_vopcx_dpp16.s (+408-336)
- (modified) llvm/test/MC/AMDGPU/gfx12_asm_vopcx_dpp8.s (+144-72)
- (modified) llvm/test/MC/AMDGPU/gfx12_asm_vopcx_t16_err.s (+360-144)
- (modified) llvm/test/MC/AMDGPU/gfx12_asm_vopcx_t16_promote.s (+360-144)
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16_from_vopcx.txt (+588-210)
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8_from_vopcx.txt (+288-60)
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_from_vopcx.txt (+172-30)
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vopcx.txt (+730-225)
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vopcx_dpp16.txt (+588-210)
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vopcx_dpp8.txt (+312-30)
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3cx.txt (+159-26)
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3cx_dpp16.txt ()
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3cx_dpp8.txt ()
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vopcx.txt (+582-195)
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vopcx_dpp16.txt (+460-182)
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vopcx_dpp8.txt (+148-26)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/VOPCInstructions.td b/llvm/lib/Target/AMDGPU/VOPCInstructions.td
index bba8aa570d2b58..ebc2d622bb16a3 100644
--- a/llvm/lib/Target/AMDGPU/VOPCInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOPCInstructions.td
@@ -1924,22 +1924,22 @@ defm V_CMP_CLASS_F16_fake16 : VOPC_Real_t16_gfx11_gfx12<0x07d, "v_cmp_class_f16"
defm V_CMP_CLASS_F32 : VOPC_Real_gfx11_gfx12<0x07e>;
defm V_CMP_CLASS_F64 : VOPC_Real_gfx11_gfx12<0x07f>;
-defm V_CMPX_F_F16_fake16 : VOPCX_Real_t16_gfx11<0x080, "v_cmpx_f_f16">;
+defm V_CMPX_F_F16 : VOPCX_Real_t16_and_fake16_gfx11<0x080, "v_cmpx_f_f16">;
defm V_CMPX_LT_F16 : VOPCX_Real_t16_and_fake16_gfx11_gfx12<0x081, "v_cmpx_lt_f16">;
-defm V_CMPX_EQ_F16_fake16 : VOPCX_Real_t16_gfx11_gfx12<0x082, "v_cmpx_eq_f16">;
-defm V_CMPX_LE_F16_fake16 : VOPCX_Real_t16_gfx11_gfx12<0x083, "v_cmpx_le_f16">;
-defm V_CMPX_GT_F16_fake16 : VOPCX_Real_t16_gfx11_gfx12<0x084, "v_cmpx_gt_f16">;
-defm V_CMPX_LG_F16_fake16 : VOPCX_Real_t16_gfx11_gfx12<0x085, "v_cmpx_lg_f16">;
-defm V_CMPX_GE_F16_fake16 : VOPCX_Real_t16_gfx11_gfx12<0x086, "v_cmpx_ge_f16">;
-defm V_CMPX_O_F16_fake16 : VOPCX_Real_t16_gfx11_gfx12<0x087, "v_cmpx_o_f16">;
-defm V_CMPX_U_F16_fake16 : VOPCX_Real_t16_gfx11_gfx12<0x088, "v_cmpx_u_f16">;
-defm V_CMPX_NGE_F16_fake16 : VOPCX_Real_t16_gfx11_gfx12<0x089, "v_cmpx_nge_f16">;
-defm V_CMPX_NLG_F16_fake16 : VOPCX_Real_t16_gfx11_gfx12<0x08a, "v_cmpx_nlg_f16">;
-defm V_CMPX_NGT_F16_fake16 : VOPCX_Real_t16_gfx11_gfx12<0x08b, "v_cmpx_ngt_f16">;
-defm V_CMPX_NLE_F16_fake16 : VOPCX_Real_t16_gfx11_gfx12<0x08c, "v_cmpx_nle_f16">;
-defm V_CMPX_NEQ_F16_fake16 : VOPCX_Real_t16_gfx11_gfx12<0x08d, "v_cmpx_neq_f16">;
-defm V_CMPX_NLT_F16_fake16 : VOPCX_Real_t16_gfx11_gfx12<0x08e, "v_cmpx_nlt_f16">;
-defm V_CMPX_T_F16_fake16 : VOPCX_Real_with_name_gfx11<0x08f, "V_CMPX_TRU_F16_fake16", "v_cmpx_t_f16", "v_cmpx_tru_f16">;
+defm V_CMPX_EQ_F16 : VOPCX_Real_t16_and_fake16_gfx11_gfx12<0x082, "v_cmpx_eq_f16">;
+defm V_CMPX_LE_F16 : VOPCX_Real_t16_and_fake16_gfx11_gfx12<0x083, "v_cmpx_le_f16">;
+defm V_CMPX_GT_F16 : VOPCX_Real_t16_and_fake16_gfx11_gfx12<0x084, "v_cmpx_gt_f16">;
+defm V_CMPX_LG_F16 : VOPCX_Real_t16_and_fake16_gfx11_gfx12<0x085, "v_cmpx_lg_f16">;
+defm V_CMPX_GE_F16 : VOPCX_Real_t16_and_fake16_gfx11_gfx12<0x086, "v_cmpx_ge_f16">;
+defm V_CMPX_O_F16 : VOPCX_Real_t16_and_fake16_gfx11_gfx12<0x087, "v_cmpx_o_f16">;
+defm V_CMPX_U_F16 : VOPCX_Real_t16_and_fake16_gfx11_gfx12<0x088, "v_cmpx_u_f16">;
+defm V_CMPX_NGE_F16 : VOPCX_Real_t16_and_fake16_gfx11_gfx12<0x089, "v_cmpx_nge_f16">;
+defm V_CMPX_NLG_F16 : VOPCX_Real_t16_and_fake16_gfx11_gfx12<0x08a, "v_cmpx_nlg_f16">;
+defm V_CMPX_NGT_F16 : VOPCX_Real_t16_and_fake16_gfx11_gfx12<0x08b, "v_cmpx_ngt_f16">;
+defm V_CMPX_NLE_F16 : VOPCX_Real_t16_and_fake16_gfx11_gfx12<0x08c, "v_cmpx_nle_f16">;
+defm V_CMPX_NEQ_F16 : VOPCX_Real_t16_and_fake16_gfx11_gfx12<0x08d, "v_cmpx_neq_f16">;
+defm V_CMPX_NLT_F16 : VOPCX_Real_t16_and_fake16_gfx11_gfx12<0x08e, "v_cmpx_nlt_f16">;
+defm V_CMPX_T_F16 : VOPCX_Real_t16_and_fake16_gfx11<0x08f, "v_cmpx_t_f16", "V_CMPX_TRU_F16", "v_cmpx_tru_f16">;
defm V_CMPX_F_F32 : VOPCX_Real_gfx11<0x090>;
defm V_CMPX_LT_F32 : VOPCX_Real_gfx11_gfx12<0x091>;
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vopcx.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vopcx.s
index 60ec94446235ed..15ddb6374821c7 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vopcx.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vopcx.s
@@ -339,47 +339,56 @@ v_cmpx_eq_u32_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
v_cmpx_eq_u32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
// GFX11: v_cmpx_eq_u32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x00,0xca,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
-v_cmpx_f_f16_e64_dpp v1, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmpx_f_f16_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x80,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+v_cmpx_f_f16_e64_dpp v1.l, v2.l quad_perm:[3,2,1,0]
+// GFX11: v_cmpx_f_f16_e64_dpp v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x80,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
-v_cmpx_f_f16_e64_dpp v1, v2 quad_perm:[0,1,2,3]
-// GFX11: v_cmpx_f_f16_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x80,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+v_cmpx_f_f16_e64_dpp v1.l, v2.l quad_perm:[0,1,2,3]
+// GFX11: v_cmpx_f_f16_e64_dpp v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x80,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
-v_cmpx_f_f16_e64_dpp v1, v2 row_mirror
-// GFX11: v_cmpx_f_f16_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x80,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+v_cmpx_f_f16_e64_dpp v1.l, v2.l row_mirror
+// GFX11: v_cmpx_f_f16_e64_dpp v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x80,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
-v_cmpx_f_f16_e64_dpp v1, v2 row_half_mirror
-// GFX11: v_cmpx_f_f16_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x80,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+v_cmpx_f_f16_e64_dpp v1.l, v2.l row_half_mirror
+// GFX11: v_cmpx_f_f16_e64_dpp v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x80,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
-v_cmpx_f_f16_e64_dpp v1, v2 row_shl:1
-// GFX11: v_cmpx_f_f16_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x80,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+v_cmpx_f_f16_e64_dpp v1.l, v2.l row_shl:1
+// GFX11: v_cmpx_f_f16_e64_dpp v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x80,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
-v_cmpx_f_f16_e64_dpp v1, v2 row_shl:15
-// GFX11: v_cmpx_f_f16_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x80,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+v_cmpx_f_f16_e64_dpp v1.l, v2.l row_shl:15
+// GFX11: v_cmpx_f_f16_e64_dpp v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x80,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
-v_cmpx_f_f16_e64_dpp v1, v2 row_shr:1
-// GFX11: v_cmpx_f_f16_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x80,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+v_cmpx_f_f16_e64_dpp v1.l, v2.l row_shr:1
+// GFX11: v_cmpx_f_f16_e64_dpp v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x80,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
-v_cmpx_f_f16_e64_dpp v1, v2 row_shr:15
-// GFX11: v_cmpx_f_f16_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x80,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+v_cmpx_f_f16_e64_dpp v1.l, v2.l row_shr:15
+// GFX11: v_cmpx_f_f16_e64_dpp v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x80,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
-v_cmpx_f_f16_e64_dpp v1, v2 row_ror:1
-// GFX11: v_cmpx_f_f16_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x80,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+v_cmpx_f_f16_e64_dpp v1.l, v2.l row_ror:1
+// GFX11: v_cmpx_f_f16_e64_dpp v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x80,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
-v_cmpx_f_f16_e64_dpp v1, v2 row_ror:15
-// GFX11: v_cmpx_f_f16_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x80,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+v_cmpx_f_f16_e64_dpp v1.l, v2.l row_ror:15
+// GFX11: v_cmpx_f_f16_e64_dpp v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x80,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
-v_cmpx_f_f16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// GFX11: v_cmpx_f_f16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x80,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+v_cmpx_f_f16_e64_dpp v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: v_cmpx_f_f16_e64_dpp v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x80,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
-v_cmpx_f_f16_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// GFX11: v_cmpx_f_f16_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x01,0x80,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+v_cmpx_f_f16_e64_dpp |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: v_cmpx_f_f16_e64_dpp |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x01,0x80,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
-v_cmpx_f_f16_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// GFX11: v_cmpx_f_f16_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x02,0x80,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+v_cmpx_f_f16_e64_dpp -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: v_cmpx_f_f16_e64_dpp -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x02,0x80,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
-v_cmpx_f_f16_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// GFX11: v_cmpx_f_f16_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x83,0x80,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+v_cmpx_f_f16_e64_dpp -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: v_cmpx_f_f16_e64_dpp -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x83,0x80,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+
+v_cmpx_f_f16_e64_dpp |v1.h|, -v2.h row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: v_cmpx_f_f16_e64_dpp |v1.h|, -v2.h op_sel:[1,1] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x19,0x80,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+
+v_cmpx_f_f16_e64_dpp -v1.h, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: v_cmpx_f_f16_e64_dpp -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x0a,0x80,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+
+v_cmpx_f_f16_e64_dpp -|v255.l|, -|v255.h| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: v_cmpx_f_f16_e64_dpp -|v255.l|, -|v255.h| op_sel:[0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x93,0x80,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
v_cmpx_f_f32_e64_dpp v1, v2 quad_perm:[3,2,1,0]
// GFX11: v_cmpx_f_f32_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x90,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
@@ -507,47 +516,56 @@ v_cmpx_f_u32_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
v_cmpx_f_u32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
// GFX11: v_cmpx_f_u32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x00,0xc8,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
-v_cmpx_ge_f16_e64_dpp v1, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmpx_ge_f16_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x86,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+v_cmpx_ge_f16_e64_dpp v1.l, v2.l quad_perm:[3,2,1,0]
+// GFX11: v_cmpx_ge_f16_e64_dpp v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x86,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_ge_f16_e64_dpp v1.l, v2.l quad_perm:[0,1,2,3]
+// GFX11: v_cmpx_ge_f16_e64_dpp v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x86,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+
+v_cmpx_ge_f16_e64_dpp v1.l, v2.l row_mirror
+// GFX11: v_cmpx_ge_f16_e64_dpp v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x86,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+
+v_cmpx_ge_f16_e64_dpp v1.l, v2.l row_half_mirror
+// GFX11: v_cmpx_ge_f16_e64_dpp v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x86,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
-v_cmpx_ge_f16_e64_dpp v1, v2 quad_perm:[0,1,2,3]
-// GFX11: v_cmpx_ge_f16_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x86,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+v_cmpx_ge_f16_e64_dpp v1.l, v2.l row_shl:1
+// GFX11: v_cmpx_ge_f16_e64_dpp v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x86,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
-v_cmpx_ge_f16_e64_dpp v1, v2 row_mirror
-// GFX11: v_cmpx_ge_f16_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x86,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+v_cmpx_ge_f16_e64_dpp v1.l, v2.l row_shl:15
+// GFX11: v_cmpx_ge_f16_e64_dpp v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x86,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
-v_cmpx_ge_f16_e64_dpp v1, v2 row_half_mirror
-// GFX11: v_cmpx_ge_f16_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x86,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+v_cmpx_ge_f16_e64_dpp v1.l, v2.l row_shr:1
+// GFX11: v_cmpx_ge_f16_e64_dpp v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x86,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
-v_cmpx_ge_f16_e64_dpp v1, v2 row_shl:1
-// GFX11: v_cmpx_ge_f16_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x86,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+v_cmpx_ge_f16_e64_dpp v1.l, v2.l row_shr:15
+// GFX11: v_cmpx_ge_f16_e64_dpp v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x86,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
-v_cmpx_ge_f16_e64_dpp v1, v2 row_shl:15
-// GFX11: v_cmpx_ge_f16_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x86,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+v_cmpx_ge_f16_e64_dpp v1.l, v2.l row_ror:1
+// GFX11: v_cmpx_ge_f16_e64_dpp v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x86,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
-v_cmpx_ge_f16_e64_dpp v1, v2 row_shr:1
-// GFX11: v_cmpx_ge_f16_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x86,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+v_cmpx_ge_f16_e64_dpp v1.l, v2.l row_ror:15
+// GFX11: v_cmpx_ge_f16_e64_dpp v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x86,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
-v_cmpx_ge_f16_e64_dpp v1, v2 row_shr:15
-// GFX11: v_cmpx_ge_f16_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x86,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+v_cmpx_ge_f16_e64_dpp v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: v_cmpx_ge_f16_e64_dpp v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x86,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
-v_cmpx_ge_f16_e64_dpp v1, v2 row_ror:1
-// GFX11: v_cmpx_ge_f16_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x86,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+v_cmpx_ge_f16_e64_dpp |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: v_cmpx_ge_f16_e64_dpp |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x01,0x86,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
-v_cmpx_ge_f16_e64_dpp v1, v2 row_ror:15
-// GFX11: v_cmpx_ge_f16_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x86,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+v_cmpx_ge_f16_e64_dpp -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: v_cmpx_ge_f16_e64_dpp -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x02,0x86,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
-v_cmpx_ge_f16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// GFX11: v_cmpx_ge_f16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x86,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+v_cmpx_ge_f16_e64_dpp -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: v_cmpx_ge_f16_e64_dpp -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x83,0x86,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
-v_cmpx_ge_f16_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// GFX11: v_cmpx_ge_f16_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x01,0x86,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+v_cmpx_ge_f16_e64_dpp |v1.h|, -v2.h row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: v_cmpx_ge_f16_e64_dpp |v1.h|, -v2.h op_sel:[1,1] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x19,0x86,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
-v_cmpx_ge_f16_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// GFX11: v_cmpx_ge_f16_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x02,0x86,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+v_cmpx_ge_f16_e64_dpp -v1.h, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: v_cmpx_ge_f16_e64_dpp -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x0a,0x86,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
-v_cmpx_ge_f16_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// GFX11: v_cmpx_ge_f16_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x83,0x86,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+v_cmpx_ge_f16_e64_dpp -|v255.l|, -|v255.h| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: v_cmpx_ge_f16_e64_dpp -|v255.l|, -|v255.h| op_sel:[0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x93,0x86,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
v_cmpx_ge_f32_e64_dpp v1, v2 quad_perm:[3,2,1,0]
// GFX11: v_cmpx_ge_f32_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x96,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
@@ -759,47 +777,56 @@ v_cmpx_ge_u32_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
v_cmpx_ge_u32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
// GFX11: v_cmpx_ge_u32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x00,0xce,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
-v_cmpx_gt_f16_e64_dpp v1, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmpx_gt_f16_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x84,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+v_cmpx_gt_f16_e64_dpp v1.l, v2.l quad_perm:[3,2,1,0]
+// GFX11: v_cmpx_gt_f16_e64_dpp v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x84,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
-v_cmpx_gt_f16_e64_dpp v1, v2 quad_perm:[0,1,2,3]
-// GFX11: v_cmpx_gt_f16_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x84,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+v_cmpx_gt_f16_e64_dpp v1.l, v2.l quad_perm:[0,1,2,3]
+// GFX11: v_cmpx_gt_f16_e64_dpp v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x84,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
-v_cmpx_gt_f16_e64_dpp v1, v2 row_mirror
-// GFX11: v_cmpx_gt_f16_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x84,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+v_cmpx_gt_f16_e64_dpp v1.l, v2.l row_mirror
+// GFX11: v_cmpx_gt_f16_e64_dpp v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x84,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
-v_cmpx_gt_f16_e64_dpp v1, v2 row_half_mirror
-// GFX11: v_cmpx_gt_f16_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x84,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+v_cmpx_gt_f16_e64_dpp v1.l, v2.l row_half_mirror
+// GFX11: v_cmpx_gt_f16_e64_dpp...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/123419
More information about the llvm-commits
mailing list