[llvm] [AMDGPU][True16][MC] added fake16 test file for gfx12 dasm test (PR #106339)

via llvm-commits llvm-commits at lists.llvm.org
Tue Aug 27 22:40:33 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-mc

@llvm/pr-subscribers-backend-amdgpu

Author: Brox Chen (broxigarchen)

<details>
<summary>Changes</summary>

This is a NFC change adding fake16 test file for gfx12 dasm test. 

The test file has the same content while duplicating for true16 and fake16 flow. The naming convention follows the gfx11 dasm tests

---

Patch is 556.38 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/106339.diff


10 Files Affected:

- (added) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop1_dpp16-fake16.txt (+2618) 
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop1_dpp16.txt (+2-2) 
- (added) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop1_dpp8-fake16.txt (+386) 
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop1_dpp8.txt (+2-2) 
- (added) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1-fake16.txt (+3313) 
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1.txt (+1-1) 
- (added) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1_dpp16-fake16.txt (+2521) 
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1_dpp16.txt (+1-1) 
- (added) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1_dpp8-fake16.txt (+625) 
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1_dpp8.txt (+1-1) 


``````````diff
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop1_dpp16-fake16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop1_dpp16-fake16.txt
new file mode 100644
index 00000000000000..7fffe438d65301
--- /dev/null
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop1_dpp16-fake16.txt
@@ -0,0 +1,2618 @@
+# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefix=GFX12 %s
+# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefix=GFX12 %s
+
+# GFX12: v_bfrev_b32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x70,0x0a,0x7e,0x01,0x1b,0x00,0xff]
+0xfa,0x70,0x0a,0x7e,0x01,0x1b,0x00,0xff
+
+# GFX12: v_bfrev_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x70,0x0a,0x7e,0x01,0xe4,0x00,0xff]
+0xfa,0x70,0x0a,0x7e,0x01,0xe4,0x00,0xff
+
+# GFX12: v_bfrev_b32_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x70,0x0a,0x7e,0x01,0x40,0x01,0xff]
+0xfa,0x70,0x0a,0x7e,0x01,0x40,0x01,0xff
+
+# GFX12: v_bfrev_b32_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x70,0x0a,0x7e,0x01,0x41,0x01,0xff]
+0xfa,0x70,0x0a,0x7e,0x01,0x41,0x01,0xff
+
+# GFX12: v_bfrev_b32_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x70,0x0a,0x7e,0x01,0x01,0x01,0xff]
+0xfa,0x70,0x0a,0x7e,0x01,0x01,0x01,0xff
+
+# GFX12: v_bfrev_b32_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x70,0x0a,0x7e,0x01,0x0f,0x01,0xff]
+0xfa,0x70,0x0a,0x7e,0x01,0x0f,0x01,0xff
+
+# GFX12: v_bfrev_b32_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x70,0x0a,0x7e,0x01,0x11,0x01,0xff]
+0xfa,0x70,0x0a,0x7e,0x01,0x11,0x01,0xff
+
+# GFX12: v_bfrev_b32_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x70,0x0a,0x7e,0x01,0x1f,0x01,0xff]
+0xfa,0x70,0x0a,0x7e,0x01,0x1f,0x01,0xff
+
+# GFX12: v_bfrev_b32_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x70,0x0a,0x7e,0x01,0x21,0x01,0xff]
+0xfa,0x70,0x0a,0x7e,0x01,0x21,0x01,0xff
+
+# GFX12: v_bfrev_b32_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x70,0x0a,0x7e,0x01,0x2f,0x01,0xff]
+0xfa,0x70,0x0a,0x7e,0x01,0x2f,0x01,0xff
+
+# GFX12: v_bfrev_b32_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x70,0x0a,0x7e,0x01,0x50,0x01,0xff]
+0xfa,0x70,0x0a,0x7e,0x01,0x50,0x01,0xff
+
+# GFX12: v_bfrev_b32_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x70,0x0a,0x7e,0x01,0x5f,0x01,0x01]
+0xfa,0x70,0x0a,0x7e,0x01,0x5f,0x01,0x01
+
+# GFX12: v_bfrev_b32_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x70,0x0a,0x7e,0x01,0x60,0x01,0x13]
+0xfa,0x70,0x0a,0x7e,0x01,0x60,0x01,0x13
+
+# GFX12: v_bfrev_b32_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0x70,0xfe,0x7f,0xff,0x6f,0x0d,0x30]
+0xfa,0x70,0xfe,0x7f,0xff,0x6f,0x0d,0x30
+
+# GFX12: v_ceil_f16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb8,0x0a,0x7e,0x01,0x1b,0x00,0xff]
+0xfa,0xb8,0x0a,0x7e,0x01,0x1b,0x00,0xff
+
+# GFX12: v_ceil_f16_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb8,0x0a,0x7e,0x01,0xe4,0x00,0xff]
+0xfa,0xb8,0x0a,0x7e,0x01,0xe4,0x00,0xff
+
+# GFX12: v_ceil_f16_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb8,0x0a,0x7e,0x01,0x40,0x01,0xff]
+0xfa,0xb8,0x0a,0x7e,0x01,0x40,0x01,0xff
+
+# GFX12: v_ceil_f16_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb8,0x0a,0x7e,0x01,0x41,0x01,0xff]
+0xfa,0xb8,0x0a,0x7e,0x01,0x41,0x01,0xff
+
+# GFX12: v_ceil_f16_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb8,0x0a,0x7e,0x01,0x01,0x01,0xff]
+0xfa,0xb8,0x0a,0x7e,0x01,0x01,0x01,0xff
+
+# GFX12: v_ceil_f16_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb8,0x0a,0x7e,0x01,0x0f,0x01,0xff]
+0xfa,0xb8,0x0a,0x7e,0x01,0x0f,0x01,0xff
+
+# GFX12: v_ceil_f16_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb8,0x0a,0x7e,0x01,0x11,0x01,0xff]
+0xfa,0xb8,0x0a,0x7e,0x01,0x11,0x01,0xff
+
+# GFX12: v_ceil_f16_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb8,0x0a,0x7e,0x01,0x1f,0x01,0xff]
+0xfa,0xb8,0x0a,0x7e,0x01,0x1f,0x01,0xff
+
+# GFX12: v_ceil_f16_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb8,0x0a,0x7e,0x01,0x21,0x01,0xff]
+0xfa,0xb8,0x0a,0x7e,0x01,0x21,0x01,0xff
+
+# GFX12: v_ceil_f16_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb8,0x0a,0x7e,0x01,0x2f,0x01,0xff]
+0xfa,0xb8,0x0a,0x7e,0x01,0x2f,0x01,0xff
+
+# GFX12: v_ceil_f16_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb8,0x0a,0x7e,0x01,0x50,0x01,0xff]
+0xfa,0xb8,0x0a,0x7e,0x01,0x50,0x01,0xff
+
+# GFX12: v_ceil_f16_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xb8,0x0a,0x7e,0x01,0x5f,0x01,0x01]
+0xfa,0xb8,0x0a,0x7e,0x01,0x5f,0x01,0x01
+
+# GFX12: v_ceil_f16_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0xb8,0x0a,0x7e,0x01,0x60,0x01,0x13]
+0xfa,0xb8,0x0a,0x7e,0x01,0x60,0x01,0x13
+
+# GFX12: v_ceil_f16_dpp v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xb8,0xfe,0x7e,0x7f,0x6f,0x3d,0x30]
+0xfa,0xb8,0xfe,0x7e,0x7f,0x6f,0x3d,0x30
+
+# GFX12: v_ceil_f32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x44,0x0a,0x7e,0x01,0x1b,0x00,0xff]
+0xfa,0x44,0x0a,0x7e,0x01,0x1b,0x00,0xff
+
+# GFX12: v_ceil_f32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x44,0x0a,0x7e,0x01,0xe4,0x00,0xff]
+0xfa,0x44,0x0a,0x7e,0x01,0xe4,0x00,0xff
+
+# GFX12: v_ceil_f32_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x44,0x0a,0x7e,0x01,0x40,0x01,0xff]
+0xfa,0x44,0x0a,0x7e,0x01,0x40,0x01,0xff
+
+# GFX12: v_ceil_f32_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x44,0x0a,0x7e,0x01,0x41,0x01,0xff]
+0xfa,0x44,0x0a,0x7e,0x01,0x41,0x01,0xff
+
+# GFX12: v_ceil_f32_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x44,0x0a,0x7e,0x01,0x01,0x01,0xff]
+0xfa,0x44,0x0a,0x7e,0x01,0x01,0x01,0xff
+
+# GFX12: v_ceil_f32_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x44,0x0a,0x7e,0x01,0x0f,0x01,0xff]
+0xfa,0x44,0x0a,0x7e,0x01,0x0f,0x01,0xff
+
+# GFX12: v_ceil_f32_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x44,0x0a,0x7e,0x01,0x11,0x01,0xff]
+0xfa,0x44,0x0a,0x7e,0x01,0x11,0x01,0xff
+
+# GFX12: v_ceil_f32_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x44,0x0a,0x7e,0x01,0x1f,0x01,0xff]
+0xfa,0x44,0x0a,0x7e,0x01,0x1f,0x01,0xff
+
+# GFX12: v_ceil_f32_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x44,0x0a,0x7e,0x01,0x21,0x01,0xff]
+0xfa,0x44,0x0a,0x7e,0x01,0x21,0x01,0xff
+
+# GFX12: v_ceil_f32_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x44,0x0a,0x7e,0x01,0x2f,0x01,0xff]
+0xfa,0x44,0x0a,0x7e,0x01,0x2f,0x01,0xff
+
+# GFX12: v_ceil_f32_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x44,0x0a,0x7e,0x01,0x50,0x01,0xff]
+0xfa,0x44,0x0a,0x7e,0x01,0x50,0x01,0xff
+
+# GFX12: v_ceil_f32_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x44,0x0a,0x7e,0x01,0x5f,0x01,0x01]
+0xfa,0x44,0x0a,0x7e,0x01,0x5f,0x01,0x01
+
+# GFX12: v_ceil_f32_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x44,0x0a,0x7e,0x01,0x60,0x01,0x13]
+0xfa,0x44,0x0a,0x7e,0x01,0x60,0x01,0x13
+
+# GFX12: v_ceil_f32_dpp v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0x44,0xfe,0x7f,0xff,0x6f,0x3d,0x30]
+0xfa,0x44,0xfe,0x7f,0xff,0x6f,0x3d,0x30
+
+# GFX12: v_cls_i32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x76,0x0a,0x7e,0x01,0x1b,0x00,0xff]
+0xfa,0x76,0x0a,0x7e,0x01,0x1b,0x00,0xff
+
+# GFX12: v_cls_i32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x76,0x0a,0x7e,0x01,0xe4,0x00,0xff]
+0xfa,0x76,0x0a,0x7e,0x01,0xe4,0x00,0xff
+
+# GFX12: v_cls_i32_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x76,0x0a,0x7e,0x01,0x40,0x01,0xff]
+0xfa,0x76,0x0a,0x7e,0x01,0x40,0x01,0xff
+
+# GFX12: v_cls_i32_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x76,0x0a,0x7e,0x01,0x41,0x01,0xff]
+0xfa,0x76,0x0a,0x7e,0x01,0x41,0x01,0xff
+
+# GFX12: v_cls_i32_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x76,0x0a,0x7e,0x01,0x01,0x01,0xff]
+0xfa,0x76,0x0a,0x7e,0x01,0x01,0x01,0xff
+
+# GFX12: v_cls_i32_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x76,0x0a,0x7e,0x01,0x0f,0x01,0xff]
+0xfa,0x76,0x0a,0x7e,0x01,0x0f,0x01,0xff
+
+# GFX12: v_cls_i32_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x76,0x0a,0x7e,0x01,0x11,0x01,0xff]
+0xfa,0x76,0x0a,0x7e,0x01,0x11,0x01,0xff
+
+# GFX12: v_cls_i32_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x76,0x0a,0x7e,0x01,0x1f,0x01,0xff]
+0xfa,0x76,0x0a,0x7e,0x01,0x1f,0x01,0xff
+
+# GFX12: v_cls_i32_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x76,0x0a,0x7e,0x01,0x21,0x01,0xff]
+0xfa,0x76,0x0a,0x7e,0x01,0x21,0x01,0xff
+
+# GFX12: v_cls_i32_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x76,0x0a,0x7e,0x01,0x2f,0x01,0xff]
+0xfa,0x76,0x0a,0x7e,0x01,0x2f,0x01,0xff
+
+# GFX12: v_cls_i32_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x76,0x0a,0x7e,0x01,0x50,0x01,0xff]
+0xfa,0x76,0x0a,0x7e,0x01,0x50,0x01,0xff
+
+# GFX12: v_cls_i32_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x76,0x0a,0x7e,0x01,0x5f,0x01,0x01]
+0xfa,0x76,0x0a,0x7e,0x01,0x5f,0x01,0x01
+
+# GFX12: v_cls_i32_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x76,0x0a,0x7e,0x01,0x60,0x01,0x13]
+0xfa,0x76,0x0a,0x7e,0x01,0x60,0x01,0x13
+
+# GFX12: v_cls_i32_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0x76,0xfe,0x7f,0xff,0x6f,0x0d,0x30]
+0xfa,0x76,0xfe,0x7f,0xff,0x6f,0x0d,0x30
+
+# GFX12: v_clz_i32_u32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x72,0x0a,0x7e,0x01,0x1b,0x00,0xff]
+0xfa,0x72,0x0a,0x7e,0x01,0x1b,0x00,0xff
+
+# GFX12: v_clz_i32_u32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x72,0x0a,0x7e,0x01,0xe4,0x00,0xff]
+0xfa,0x72,0x0a,0x7e,0x01,0xe4,0x00,0xff
+
+# GFX12: v_clz_i32_u32_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x72,0x0a,0x7e,0x01,0x40,0x01,0xff]
+0xfa,0x72,0x0a,0x7e,0x01,0x40,0x01,0xff
+
+# GFX12: v_clz_i32_u32_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x72,0x0a,0x7e,0x01,0x41,0x01,0xff]
+0xfa,0x72,0x0a,0x7e,0x01,0x41,0x01,0xff
+
+# GFX12: v_clz_i32_u32_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x72,0x0a,0x7e,0x01,0x01,0x01,0xff]
+0xfa,0x72,0x0a,0x7e,0x01,0x01,0x01,0xff
+
+# GFX12: v_clz_i32_u32_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x72,0x0a,0x7e,0x01,0x0f,0x01,0xff]
+0xfa,0x72,0x0a,0x7e,0x01,0x0f,0x01,0xff
+
+# GFX12: v_clz_i32_u32_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x72,0x0a,0x7e,0x01,0x11,0x01,0xff]
+0xfa,0x72,0x0a,0x7e,0x01,0x11,0x01,0xff
+
+# GFX12: v_clz_i32_u32_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x72,0x0a,0x7e,0x01,0x1f,0x01,0xff]
+0xfa,0x72,0x0a,0x7e,0x01,0x1f,0x01,0xff
+
+# GFX12: v_clz_i32_u32_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x72,0x0a,0x7e,0x01,0x21,0x01,0xff]
+0xfa,0x72,0x0a,0x7e,0x01,0x21,0x01,0xff
+
+# GFX12: v_clz_i32_u32_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x72,0x0a,0x7e,0x01,0x2f,0x01,0xff]
+0xfa,0x72,0x0a,0x7e,0x01,0x2f,0x01,0xff
+
+# GFX12: v_clz_i32_u32_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x72,0x0a,0x7e,0x01,0x50,0x01,0xff]
+0xfa,0x72,0x0a,0x7e,0x01,0x50,0x01,0xff
+
+# GFX12: v_clz_i32_u32_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x72,0x0a,0x7e,0x01,0x5f,0x01,0x01]
+0xfa,0x72,0x0a,0x7e,0x01,0x5f,0x01,0x01
+
+# GFX12: v_clz_i32_u32_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x72,0x0a,0x7e,0x01,0x60,0x01,0x13]
+0xfa,0x72,0x0a,0x7e,0x01,0x60,0x01,0x13
+
+# GFX12: v_clz_i32_u32_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0x72,0xfe,0x7f,0xff,0x6f,0x0d,0x30]
+0xfa,0x72,0xfe,0x7f,0xff,0x6f,0x0d,0x30
+
+# GFX12: v_cos_f16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc2,0x0a,0x7e,0x01,0x1b,0x00,0xff]
+0xfa,0xc2,0x0a,0x7e,0x01,0x1b,0x00,0xff
+
+# GFX12: v_cos_f16_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc2,0x0a,0x7e,0x01,0xe4,0x00,0xff]
+0xfa,0xc2,0x0a,0x7e,0x01,0xe4,0x00,0xff
+
+# GFX12: v_cos_f16_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc2,0x0a,0x7e,0x01,0x40,0x01,0xff]
+0xfa,0xc2,0x0a,0x7e,0x01,0x40,0x01,0xff
+
+# GFX12: v_cos_f16_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc2,0x0a,0x7e,0x01,0x41,0x01,0xff]
+0xfa,0xc2,0x0a,0x7e,0x01,0x41,0x01,0xff
+
+# GFX12: v_cos_f16_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc2,0x0a,0x7e,0x01,0x01,0x01,0xff]
+0xfa,0xc2,0x0a,0x7e,0x01,0x01,0x01,0xff
+
+# GFX12: v_cos_f16_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc2,0x0a,0x7e,0x01,0x0f,0x01,0xff]
+0xfa,0xc2,0x0a,0x7e,0x01,0x0f,0x01,0xff
+
+# GFX12: v_cos_f16_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc2,0x0a,0x7e,0x01,0x11,0x01,0xff]
+0xfa,0xc2,0x0a,0x7e,0x01,0x11,0x01,0xff
+
+# GFX12: v_cos_f16_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc2,0x0a,0x7e,0x01,0x1f,0x01,0xff]
+0xfa,0xc2,0x0a,0x7e,0x01,0x1f,0x01,0xff
+
+# GFX12: v_cos_f16_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc2,0x0a,0x7e,0x01,0x21,0x01,0xff]
+0xfa,0xc2,0x0a,0x7e,0x01,0x21,0x01,0xff
+
+# GFX12: v_cos_f16_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc2,0x0a,0x7e,0x01,0x2f,0x01,0xff]
+0xfa,0xc2,0x0a,0x7e,0x01,0x2f,0x01,0xff
+
+# GFX12: v_cos_f16_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc2,0x0a,0x7e,0x01,0x50,0x01,0xff]
+0xfa,0xc2,0x0a,0x7e,0x01,0x50,0x01,0xff
+
+# GFX12: v_cos_f16_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xc2,0x0a,0x7e,0x01,0x5f,0x01,0x01]
+0xfa,0xc2,0x0a,0x7e,0x01,0x5f,0x01,0x01
+
+# GFX12: v_cos_f16_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0xc2,0x0a,0x7e,0x01,0x60,0x01,0x13]
+0xfa,0xc2,0x0a,0x7e,0x01,0x60,0x01,0x13
+
+# GFX12: v_cos_f16_dpp v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xc2,0xfe,0x7e,0x7f,0x6f,0x3d,0x30]
+0xfa,0xc2,0xfe,0x7e,0x7f,0x6f,0x3d,0x30
+
+# GFX12: v_cos_f32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x6c,0x0a,0x7e,0x01,0x1b,0x00,0xff]
+0xfa,0x6c,0x0a,0x7e,0x01,0x1b,0x00,0xff
+
+# GFX12: v_cos_f32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x6c,0x0a,0x7e,0x01,0xe4,0x00,0xff]
+0xfa,0x6c,0x0a,0x7e,0x01,0xe4,0x00,0xff
+
+# GFX12: v_cos_f32_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x6c,0x0a,0x7e,0x01,0x40,0x01,0xff]
+0xfa,0x6c,0x0a,0x7e,0x01,0x40,0x01,0xff
+
+# GFX12: v_cos_f32_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x6c,0x0a,0x7e,0x01,0x41,0x01,0xff]
+0xfa,0x6c,0x0a,0x7e,0x01,0x41,0x01,0xff
+
+# GFX12: v_cos_f32_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x6c,0x0a,0x7e,0x01,0x01,0x01,0xff]
+0xfa,0x6c,0x0a,0x7e,0x01,0x01,0x01,0xff
+
+# GFX12: v_cos_f32_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x6c,0x0a,0x7e,0x01,0x0f,0x01,0xff]
+0xfa,0x6c,0x0a,0x7e,0x01,0x0f,0x01,0xff
+
+# GFX12: v_cos_f32_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x6c,0x0a,0x7e,0x01,0x11,0x01,0xff]
+0xfa,0x6c,0x0a,0x7e,0x01,0x11,0x01,0xff
+
+# GFX12: v_cos_f32_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x6c,0x0a,0x7e,0x01,0x1f,0x01,0xff]
+0xfa,0x6c,0x0a,0x7e,0x01,0x1f,0x01,0xff
+
+# GFX12: v_cos_f32_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x6c,0x0a,0x7e,0x01,0x21,0x01,0xff]
+0xfa,0x6c,0x0a,0x7e,0x01,0x21,0x01,0xff
+
+# GFX12: v_cos_f32_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x6c,0x0a,0x7e,0x01,0x2f,0x01,0xff]
+0xfa,0x6c,0x0a,0x7e,0x01,0x2f,0x01,0xff
+
+# GFX12: v_cos_f32_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x6c,0x0a,0x7e,0x01,0x50,0x01,0xff]
+0xfa,0x6c,0x0a,0x7e,0x01,0x50,0x01,0xff
+
+# GFX12: v_cos_f32_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x6c,0x0a,0x7e,0x01,0x5f,0x01,0x01]
+0xfa,0x6c,0x0a,0x7e,0x01,0x5f,0x01,0x01
+
+# GFX12: v_cos_f32_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x6c,0x0a,0x7e,0x01,0x60,0x01,0x13]
+0xfa,0x6c,0x0a,0x7e,0x01,0x60,0x01,0x13
+
+# GFX12: v_cos_f32_dpp v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0x6c,0xfe,0x7f,0xff,0x6f,0x3d,0x30]
+0xfa,0x6c,0xfe,0x7f,0xff,0x6f,0x3d,0x30
+
+# GFX12: v_ctz_i32_b32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x74,0x0a,0x7e,0x01,0x1b,0x00,0xff]
+0xfa,0x74,0x0a,0x7e,0x01,0x1b,0x00,0xff
+
+# GFX12: v_ctz_i32_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x74,0x0a,0x7e,0x01,0xe4,0x00,0xff]
+0xfa,0x74,0x0a,0x7e,0x01,0xe4,0x00,0xff
+
+# GFX12: v_ctz_i32_b32_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x74,0x0a,0x7e,0x01,0x40,0x01,0xff]
+0xfa,0x74,0x0a,0x7e,0x01,0x40,0x01,0xff
+
+# GFX12: v_ctz_i32_b32_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x74,0x0a,0x7e,0x01,0x41,0x01,0xff]
+0xfa,0x74,0x0a,0x7e,0x01,0x41,0x01,0xff
+
+# GFX12: v_ctz_i32_b32_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x74,0x0a,0x7e,0x01,0x01,0x01,0xff]
+0xfa,0x74,0x0a,0x7e,0x01,0x01,0x01,0xff
+
+# GFX12: v_ctz_i32_b32_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x74,0x0a,0x7e,0x01,0x0f,0x01,0xff]
+0xfa,0x74,0x0a,0x7e,0x01,0x0f,0x01,0xff
+
+# GFX12: v_ctz_i32_b32_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x74,0x0a,0x7e,0x01,0x11,0x01,0xff]
+0xfa,0x74,0x0a,0x7e,0x01,0x11,0x01,0xff
+
+# GFX12: v_ctz_i32_b32_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x74,0x0a,0x7e,0x01,0x1f,0x01,0xff]
+0xfa,0x74,0x0a,0x7e,0x01,0x1f,0x01,0xff
+
+# GFX12: v_ctz_i32_b32_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x74,0x0a,0x7e,0x01,0x21,0x01,0xff]
+0xfa,0x74,0x0a,0x7e,0x01,0x21,0x01,0xff
+
+# GFX12: v_ctz_i32_b32_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x74,0x0a,0x7e,0x01,0x2f,0x01,0xff]
+0xfa,0x74,0x0a,0x7e,0x01,0x2f,0x01,0xff
+
+# GFX12: v_ctz_i32_b32_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x74,0x0a,0x7e,0x01,0x50,0x01,0xff]
+0xfa,0x74,0x0a,0x7e,0x01,0x50,0x01,0xff
+
+# GFX12: v_ctz_i32_b32_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x74,0x0a,0x7e,0x01,0x5f,0x01,0x01]
+0xfa,0x74,0x0a,0x7e,0x01,0x5f,0x01,0x01
+
+# GFX12: v_ctz_i32_b32_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x74,0x0a,0x7e,0x01,0x60,0x01,0x13]
+0xfa,0x74,0x0a,0x7e,0x01,0x60,0x01,0x13
+
+# GFX12: v_ctz_i32_b32_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0x74,0xfe,0x7f,0xff,0x6f,0x0d,0x30]
+0xfa,0x74,0xfe,0x7f,0xff,0x6f,0x0d,0x30
+
+# GFX12: v_cvt_f32_fp8_dpp v1, v3 quad_perm:[0,1,2,3] row_mask:0xa bank_mask:0xc ; encoding: [0xfa,0xd8,0x02,0x7e,0x03,0xe4,0x00,0xac]
+0xfa,0xd8,0x02,0x7e,0x03,0xe4,0x00,0xac
+
+# GFX12: v_cvt_f32_fp8_dpp v1, v3 quad_perm:[3,2,1,0] row_mask:0x2 bank_mask:0xe ; encoding: [0xfa,0xd8,0x02,0x7e,0x03,0x1b,0x00,0x2e]
+0xfa,0xd8,0x02,0x7e,0x03,0x1b,0x00,0x2e
+
+# GFX12: v_cvt_f32_bf8_dpp v1, v3 quad_perm:[0,1,2,3] row_mask:0xa bank_mask:0xc ; encoding: [0xfa,0xda,0x02,0x7e,0x03,0xe4,0x00,0xac]
+0xfa,0xda,0x02,0x7e,0x03,0xe4,0x00,0xac
+
+# GFX12: v_cvt_f32_bf8_dpp v1, v3 quad_perm:[3,2,1,0] row_mask:0x2 bank_mask:0xe ; encoding: [0xfa,0xda,0x02,0x7e,0x03,0x1b,0x00,0x2e]
+0xfa,0xda,0x02,0x7e,0x03,0x1b,0x00,0x2e
+
+# GFX12: v_c...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/106339


More information about the llvm-commits mailing list