[llvm] [AMDGPU][True16][CodeGen] update more GFX11Plus codegen test with true16 mode (PR #138600)
Joe Nash via llvm-commits
llvm-commits at lists.llvm.org
Mon May 12 07:33:50 PDT 2025
================
@@ -1394,133 +1395,169 @@ define <44 x i16> @bitcast_v22i32_to_v44i16(<22 x i32> %a, i32 %b) {
; GFX9-NEXT: v_perm_b32 v21, v22, v21, s4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-LABEL: bitcast_v22i32_to_v44i16:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v22
-; GFX11-NEXT: ; implicit-def: $vgpr51
-; GFX11-NEXT: ; implicit-def: $vgpr50
-; GFX11-NEXT: ; implicit-def: $vgpr49
-; GFX11-NEXT: ; implicit-def: $vgpr48
-; GFX11-NEXT: ; implicit-def: $vgpr39
-; GFX11-NEXT: ; implicit-def: $vgpr38
-; GFX11-NEXT: ; implicit-def: $vgpr37
-; GFX11-NEXT: ; implicit-def: $vgpr36
-; GFX11-NEXT: ; implicit-def: $vgpr35
-; GFX11-NEXT: ; implicit-def: $vgpr34
-; GFX11-NEXT: ; implicit-def: $vgpr33
-; GFX11-NEXT: ; implicit-def: $vgpr32
-; GFX11-NEXT: ; implicit-def: $vgpr31
-; GFX11-NEXT: ; implicit-def: $vgpr30
-; GFX11-NEXT: ; implicit-def: $vgpr29
-; GFX11-NEXT: ; implicit-def: $vgpr28
-; GFX11-NEXT: ; implicit-def: $vgpr27
-; GFX11-NEXT: ; implicit-def: $vgpr26
-; GFX11-NEXT: ; implicit-def: $vgpr25
-; GFX11-NEXT: ; implicit-def: $vgpr24
-; GFX11-NEXT: ; implicit-def: $vgpr23
-; GFX11-NEXT: ; implicit-def: $vgpr22
-; GFX11-NEXT: s_and_saveexec_b32 s0, vcc_lo
-; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
-; GFX11-NEXT: s_xor_b32 s0, exec_lo, s0
-; GFX11-NEXT: s_cbranch_execz .LBB6_2
-; GFX11-NEXT: ; %bb.1: ; %cmp.false
-; GFX11-NEXT: v_lshrrev_b32_e32 v22, 16, v21
-; GFX11-NEXT: v_lshrrev_b32_e32 v23, 16, v20
-; GFX11-NEXT: v_lshrrev_b32_e32 v24, 16, v19
-; GFX11-NEXT: v_lshrrev_b32_e32 v25, 16, v18
-; GFX11-NEXT: v_lshrrev_b32_e32 v26, 16, v17
-; GFX11-NEXT: v_lshrrev_b32_e32 v27, 16, v16
-; GFX11-NEXT: v_lshrrev_b32_e32 v28, 16, v15
-; GFX11-NEXT: v_lshrrev_b32_e32 v29, 16, v14
-; GFX11-NEXT: v_lshrrev_b32_e32 v30, 16, v13
-; GFX11-NEXT: v_lshrrev_b32_e32 v31, 16, v12
-; GFX11-NEXT: v_lshrrev_b32_e32 v32, 16, v11
-; GFX11-NEXT: v_lshrrev_b32_e32 v33, 16, v10
-; GFX11-NEXT: v_lshrrev_b32_e32 v34, 16, v9
-; GFX11-NEXT: v_lshrrev_b32_e32 v35, 16, v8
-; GFX11-NEXT: v_lshrrev_b32_e32 v36, 16, v7
-; GFX11-NEXT: v_lshrrev_b32_e32 v37, 16, v6
-; GFX11-NEXT: v_lshrrev_b32_e32 v38, 16, v5
-; GFX11-NEXT: v_lshrrev_b32_e32 v39, 16, v4
-; GFX11-NEXT: v_lshrrev_b32_e32 v48, 16, v3
-; GFX11-NEXT: v_lshrrev_b32_e32 v49, 16, v2
-; GFX11-NEXT: v_lshrrev_b32_e32 v50, 16, v1
-; GFX11-NEXT: v_lshrrev_b32_e32 v51, 16, v0
-; GFX11-NEXT: .LBB6_2: ; %Flow
-; GFX11-NEXT: s_and_not1_saveexec_b32 s0, s0
-; GFX11-NEXT: s_cbranch_execz .LBB6_4
-; GFX11-NEXT: ; %bb.3: ; %cmp.true
-; GFX11-NEXT: v_add_nc_u32_e32 v21, 3, v21
-; GFX11-NEXT: v_add_nc_u32_e32 v20, 3, v20
-; GFX11-NEXT: v_add_nc_u32_e32 v19, 3, v19
-; GFX11-NEXT: v_add_nc_u32_e32 v18, 3, v18
-; GFX11-NEXT: v_add_nc_u32_e32 v17, 3, v17
-; GFX11-NEXT: v_add_nc_u32_e32 v16, 3, v16
-; GFX11-NEXT: v_add_nc_u32_e32 v15, 3, v15
-; GFX11-NEXT: v_add_nc_u32_e32 v14, 3, v14
-; GFX11-NEXT: v_add_nc_u32_e32 v13, 3, v13
-; GFX11-NEXT: v_add_nc_u32_e32 v12, 3, v12
-; GFX11-NEXT: v_add_nc_u32_e32 v11, 3, v11
-; GFX11-NEXT: v_add_nc_u32_e32 v10, 3, v10
-; GFX11-NEXT: v_add_nc_u32_e32 v9, 3, v9
-; GFX11-NEXT: v_add_nc_u32_e32 v8, 3, v8
-; GFX11-NEXT: v_add_nc_u32_e32 v7, 3, v7
-; GFX11-NEXT: v_add_nc_u32_e32 v6, 3, v6
-; GFX11-NEXT: v_add_nc_u32_e32 v5, 3, v5
-; GFX11-NEXT: v_add_nc_u32_e32 v4, 3, v4
-; GFX11-NEXT: v_add_nc_u32_e32 v3, 3, v3
-; GFX11-NEXT: v_add_nc_u32_e32 v2, 3, v2
-; GFX11-NEXT: v_add_nc_u32_e32 v1, 3, v1
-; GFX11-NEXT: v_add_nc_u32_e32 v0, 3, v0
-; GFX11-NEXT: v_lshrrev_b32_e32 v22, 16, v21
-; GFX11-NEXT: v_lshrrev_b32_e32 v23, 16, v20
-; GFX11-NEXT: v_lshrrev_b32_e32 v24, 16, v19
-; GFX11-NEXT: v_lshrrev_b32_e32 v25, 16, v18
-; GFX11-NEXT: v_lshrrev_b32_e32 v26, 16, v17
-; GFX11-NEXT: v_lshrrev_b32_e32 v27, 16, v16
-; GFX11-NEXT: v_lshrrev_b32_e32 v28, 16, v15
-; GFX11-NEXT: v_lshrrev_b32_e32 v29, 16, v14
-; GFX11-NEXT: v_lshrrev_b32_e32 v30, 16, v13
-; GFX11-NEXT: v_lshrrev_b32_e32 v31, 16, v12
-; GFX11-NEXT: v_lshrrev_b32_e32 v32, 16, v11
-; GFX11-NEXT: v_lshrrev_b32_e32 v33, 16, v10
-; GFX11-NEXT: v_lshrrev_b32_e32 v34, 16, v9
-; GFX11-NEXT: v_lshrrev_b32_e32 v35, 16, v8
-; GFX11-NEXT: v_lshrrev_b32_e32 v36, 16, v7
-; GFX11-NEXT: v_lshrrev_b32_e32 v37, 16, v6
-; GFX11-NEXT: v_lshrrev_b32_e32 v38, 16, v5
-; GFX11-NEXT: v_lshrrev_b32_e32 v39, 16, v4
-; GFX11-NEXT: v_lshrrev_b32_e32 v48, 16, v3
-; GFX11-NEXT: v_lshrrev_b32_e32 v49, 16, v2
-; GFX11-NEXT: v_lshrrev_b32_e32 v50, 16, v1
-; GFX11-NEXT: v_lshrrev_b32_e32 v51, 16, v0
-; GFX11-NEXT: .LBB6_4: ; %end
-; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT: v_perm_b32 v0, v51, v0, 0x5040100
-; GFX11-NEXT: v_perm_b32 v1, v50, v1, 0x5040100
-; GFX11-NEXT: v_perm_b32 v2, v49, v2, 0x5040100
-; GFX11-NEXT: v_perm_b32 v3, v48, v3, 0x5040100
-; GFX11-NEXT: v_perm_b32 v4, v39, v4, 0x5040100
-; GFX11-NEXT: v_perm_b32 v5, v38, v5, 0x5040100
-; GFX11-NEXT: v_perm_b32 v6, v37, v6, 0x5040100
-; GFX11-NEXT: v_perm_b32 v7, v36, v7, 0x5040100
-; GFX11-NEXT: v_perm_b32 v8, v35, v8, 0x5040100
-; GFX11-NEXT: v_perm_b32 v9, v34, v9, 0x5040100
-; GFX11-NEXT: v_perm_b32 v10, v33, v10, 0x5040100
-; GFX11-NEXT: v_perm_b32 v11, v32, v11, 0x5040100
-; GFX11-NEXT: v_perm_b32 v12, v31, v12, 0x5040100
-; GFX11-NEXT: v_perm_b32 v13, v30, v13, 0x5040100
-; GFX11-NEXT: v_perm_b32 v14, v29, v14, 0x5040100
-; GFX11-NEXT: v_perm_b32 v15, v28, v15, 0x5040100
-; GFX11-NEXT: v_perm_b32 v16, v27, v16, 0x5040100
-; GFX11-NEXT: v_perm_b32 v17, v26, v17, 0x5040100
-; GFX11-NEXT: v_perm_b32 v18, v25, v18, 0x5040100
-; GFX11-NEXT: v_perm_b32 v19, v24, v19, 0x5040100
-; GFX11-NEXT: v_perm_b32 v20, v23, v20, 0x5040100
-; GFX11-NEXT: v_perm_b32 v21, v22, v21, 0x5040100
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: bitcast_v22i32_to_v44i16:
----------------
Sisyph wrote:
@Shoreshen Does this ISA for the True16 workflow makes sense? If so it looks like a substantial improvement.
https://github.com/llvm/llvm-project/pull/138600
More information about the llvm-commits
mailing list