[llvm] [AMDGPU][True16][CodeGen] update more GFX11Plus codegen test with true16 mode (PR #138600)

via llvm-commits llvm-commits at lists.llvm.org
Mon May 5 15:11:05 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-amdgpu

Author: Brox Chen (broxigarchen)

<details>
<summary>Changes</summary>

This is a NFC patch.

This patch duplicate GFX11plus runlines and apply them with "+mattr=+real-true16" and "+mattr=-real-true16" on more gfx11/gfx12 test. And then update the test with the update script

---

Patch is 6.60 MiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/138600.diff


27 Files Affected:

- (modified) llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.576bit.ll (+2124-1573) 
- (modified) llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.640bit.ll (+2300-1717) 
- (modified) llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.704bit.ll (+2480-1863) 
- (modified) llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.768bit.ll (+2656-2007) 
- (modified) llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.832bit.ll (+2836-2153) 
- (modified) llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.896bit.ll (+3012-2297) 
- (modified) llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.960bit.ll (+3192-2443) 
- (modified) llvm/test/CodeGen/AMDGPU/buffer-fat-pointer-atomicrmw-fadd.ll (+2391-1190) 
- (modified) llvm/test/CodeGen/AMDGPU/buffer-fat-pointer-atomicrmw-fmax.ll (+2280-1125) 
- (modified) llvm/test/CodeGen/AMDGPU/buffer-fat-pointer-atomicrmw-fmin.ll (+2280-1125) 
- (modified) llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fadd.ll (+4768-2353) 
- (modified) llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fmax.ll (+5362-2643) 
- (modified) llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fmin.ll (+5362-2643) 
- (modified) llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fsub.ll (+5222-2577) 
- (modified) llvm/test/CodeGen/AMDGPU/flat-scratch-svs.ll (+741-338) 
- (modified) llvm/test/CodeGen/AMDGPU/gfx11-twoaddr-fma.mir (+2-2) 
- (modified) llvm/test/CodeGen/AMDGPU/global-atomicrmw-fadd.ll (+4950-2446) 
- (modified) llvm/test/CodeGen/AMDGPU/global-atomicrmw-fmax.ll (+5314-2604) 
- (modified) llvm/test/CodeGen/AMDGPU/global-atomicrmw-fmin.ll (+5314-2604) 
- (modified) llvm/test/CodeGen/AMDGPU/global-atomicrmw-fsub.ll (+5196-2560) 
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.waitcnt.out.order.ll (+128-57) 
- (modified) llvm/test/CodeGen/AMDGPU/local-atomicrmw-fadd.ll (+2368-1172) 
- (modified) llvm/test/CodeGen/AMDGPU/local-atomicrmw-fmax.ll (+2790-1368) 
- (modified) llvm/test/CodeGen/AMDGPU/local-atomicrmw-fmin.ll (+2790-1368) 
- (modified) llvm/test/CodeGen/AMDGPU/local-atomicrmw-fsub.ll (+2763-1357) 
- (modified) llvm/test/CodeGen/AMDGPU/uniform-vgpr-to-sgpr-return.ll (+19-9) 
- (modified) llvm/test/CodeGen/AMDGPU/v_sat_pk_u8_i16.ll (+380-162) 


``````````diff
diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.576bit.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.576bit.ll
index 1ef7d358d8cae..8ae7b58330256 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.576bit.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.576bit.ll
@@ -3,7 +3,8 @@
 ; RUN: llc -mtriple=amdgcn < %s | FileCheck -check-prefix=GCN %s
 ; RUN: llc -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=VI %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-TRUE16 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16 %s
 
 define <18 x float> @bitcast_v18i32_to_v18f32(<18 x i32> %a, i32 %b) {
 ; GCN-LABEL: bitcast_v18i32_to_v18f32:
@@ -1227,113 +1228,145 @@ define <36 x i16> @bitcast_v18i32_to_v36i16(<18 x i32> %a, i32 %b) {
 ; GFX9-NEXT:    v_perm_b32 v17, v18, v17, s4
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: bitcast_v18i32_to_v36i16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v18
-; GFX11-NEXT:    ; implicit-def: $vgpr35
-; GFX11-NEXT:    ; implicit-def: $vgpr34
-; GFX11-NEXT:    ; implicit-def: $vgpr33
-; GFX11-NEXT:    ; implicit-def: $vgpr32
-; GFX11-NEXT:    ; implicit-def: $vgpr31
-; GFX11-NEXT:    ; implicit-def: $vgpr30
-; GFX11-NEXT:    ; implicit-def: $vgpr29
-; GFX11-NEXT:    ; implicit-def: $vgpr28
-; GFX11-NEXT:    ; implicit-def: $vgpr27
-; GFX11-NEXT:    ; implicit-def: $vgpr26
-; GFX11-NEXT:    ; implicit-def: $vgpr25
-; GFX11-NEXT:    ; implicit-def: $vgpr24
-; GFX11-NEXT:    ; implicit-def: $vgpr23
-; GFX11-NEXT:    ; implicit-def: $vgpr22
-; GFX11-NEXT:    ; implicit-def: $vgpr21
-; GFX11-NEXT:    ; implicit-def: $vgpr20
-; GFX11-NEXT:    ; implicit-def: $vgpr19
-; GFX11-NEXT:    ; implicit-def: $vgpr18
-; GFX11-NEXT:    s_and_saveexec_b32 s0, vcc_lo
-; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX11-NEXT:    s_xor_b32 s0, exec_lo, s0
-; GFX11-NEXT:    s_cbranch_execz .LBB6_2
-; GFX11-NEXT:  ; %bb.1: ; %cmp.false
-; GFX11-NEXT:    v_lshrrev_b32_e32 v18, 16, v17
-; GFX11-NEXT:    v_lshrrev_b32_e32 v19, 16, v16
-; GFX11-NEXT:    v_lshrrev_b32_e32 v20, 16, v15
-; GFX11-NEXT:    v_lshrrev_b32_e32 v21, 16, v14
-; GFX11-NEXT:    v_lshrrev_b32_e32 v22, 16, v13
-; GFX11-NEXT:    v_lshrrev_b32_e32 v23, 16, v12
-; GFX11-NEXT:    v_lshrrev_b32_e32 v24, 16, v11
-; GFX11-NEXT:    v_lshrrev_b32_e32 v25, 16, v10
-; GFX11-NEXT:    v_lshrrev_b32_e32 v26, 16, v9
-; GFX11-NEXT:    v_lshrrev_b32_e32 v27, 16, v8
-; GFX11-NEXT:    v_lshrrev_b32_e32 v28, 16, v7
-; GFX11-NEXT:    v_lshrrev_b32_e32 v29, 16, v6
-; GFX11-NEXT:    v_lshrrev_b32_e32 v30, 16, v5
-; GFX11-NEXT:    v_lshrrev_b32_e32 v31, 16, v4
-; GFX11-NEXT:    v_lshrrev_b32_e32 v32, 16, v3
-; GFX11-NEXT:    v_lshrrev_b32_e32 v33, 16, v2
-; GFX11-NEXT:    v_lshrrev_b32_e32 v34, 16, v1
-; GFX11-NEXT:    v_lshrrev_b32_e32 v35, 16, v0
-; GFX11-NEXT:  .LBB6_2: ; %Flow
-; GFX11-NEXT:    s_and_not1_saveexec_b32 s0, s0
-; GFX11-NEXT:    s_cbranch_execz .LBB6_4
-; GFX11-NEXT:  ; %bb.3: ; %cmp.true
-; GFX11-NEXT:    v_add_nc_u32_e32 v17, 3, v17
-; GFX11-NEXT:    v_add_nc_u32_e32 v16, 3, v16
-; GFX11-NEXT:    v_add_nc_u32_e32 v15, 3, v15
-; GFX11-NEXT:    v_add_nc_u32_e32 v14, 3, v14
-; GFX11-NEXT:    v_add_nc_u32_e32 v13, 3, v13
-; GFX11-NEXT:    v_add_nc_u32_e32 v12, 3, v12
-; GFX11-NEXT:    v_add_nc_u32_e32 v11, 3, v11
-; GFX11-NEXT:    v_add_nc_u32_e32 v10, 3, v10
-; GFX11-NEXT:    v_add_nc_u32_e32 v9, 3, v9
-; GFX11-NEXT:    v_add_nc_u32_e32 v8, 3, v8
-; GFX11-NEXT:    v_add_nc_u32_e32 v7, 3, v7
-; GFX11-NEXT:    v_add_nc_u32_e32 v6, 3, v6
-; GFX11-NEXT:    v_add_nc_u32_e32 v5, 3, v5
-; GFX11-NEXT:    v_add_nc_u32_e32 v4, 3, v4
-; GFX11-NEXT:    v_add_nc_u32_e32 v3, 3, v3
-; GFX11-NEXT:    v_add_nc_u32_e32 v2, 3, v2
-; GFX11-NEXT:    v_add_nc_u32_e32 v1, 3, v1
-; GFX11-NEXT:    v_add_nc_u32_e32 v0, 3, v0
-; GFX11-NEXT:    v_lshrrev_b32_e32 v18, 16, v17
-; GFX11-NEXT:    v_lshrrev_b32_e32 v19, 16, v16
-; GFX11-NEXT:    v_lshrrev_b32_e32 v20, 16, v15
-; GFX11-NEXT:    v_lshrrev_b32_e32 v21, 16, v14
-; GFX11-NEXT:    v_lshrrev_b32_e32 v22, 16, v13
-; GFX11-NEXT:    v_lshrrev_b32_e32 v23, 16, v12
-; GFX11-NEXT:    v_lshrrev_b32_e32 v24, 16, v11
-; GFX11-NEXT:    v_lshrrev_b32_e32 v25, 16, v10
-; GFX11-NEXT:    v_lshrrev_b32_e32 v26, 16, v9
-; GFX11-NEXT:    v_lshrrev_b32_e32 v27, 16, v8
-; GFX11-NEXT:    v_lshrrev_b32_e32 v28, 16, v7
-; GFX11-NEXT:    v_lshrrev_b32_e32 v29, 16, v6
-; GFX11-NEXT:    v_lshrrev_b32_e32 v30, 16, v5
-; GFX11-NEXT:    v_lshrrev_b32_e32 v31, 16, v4
-; GFX11-NEXT:    v_lshrrev_b32_e32 v32, 16, v3
-; GFX11-NEXT:    v_lshrrev_b32_e32 v33, 16, v2
-; GFX11-NEXT:    v_lshrrev_b32_e32 v34, 16, v1
-; GFX11-NEXT:    v_lshrrev_b32_e32 v35, 16, v0
-; GFX11-NEXT:  .LBB6_4: ; %end
-; GFX11-NEXT:    s_or_b32 exec_lo, exec_lo, s0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_perm_b32 v0, v35, v0, 0x5040100
-; GFX11-NEXT:    v_perm_b32 v1, v34, v1, 0x5040100
-; GFX11-NEXT:    v_perm_b32 v2, v33, v2, 0x5040100
-; GFX11-NEXT:    v_perm_b32 v3, v32, v3, 0x5040100
-; GFX11-NEXT:    v_perm_b32 v4, v31, v4, 0x5040100
-; GFX11-NEXT:    v_perm_b32 v5, v30, v5, 0x5040100
-; GFX11-NEXT:    v_perm_b32 v6, v29, v6, 0x5040100
-; GFX11-NEXT:    v_perm_b32 v7, v28, v7, 0x5040100
-; GFX11-NEXT:    v_perm_b32 v8, v27, v8, 0x5040100
-; GFX11-NEXT:    v_perm_b32 v9, v26, v9, 0x5040100
-; GFX11-NEXT:    v_perm_b32 v10, v25, v10, 0x5040100
-; GFX11-NEXT:    v_perm_b32 v11, v24, v11, 0x5040100
-; GFX11-NEXT:    v_perm_b32 v12, v23, v12, 0x5040100
-; GFX11-NEXT:    v_perm_b32 v13, v22, v13, 0x5040100
-; GFX11-NEXT:    v_perm_b32 v14, v21, v14, 0x5040100
-; GFX11-NEXT:    v_perm_b32 v15, v20, v15, 0x5040100
-; GFX11-NEXT:    v_perm_b32 v16, v19, v16, 0x5040100
-; GFX11-NEXT:    v_perm_b32 v17, v18, v17, 0x5040100
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: bitcast_v18i32_to_v36i16:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    s_mov_b32 s0, exec_lo
+; GFX11-TRUE16-NEXT:    v_cmpx_ne_u32_e32 0, v18
+; GFX11-TRUE16-NEXT:    s_xor_b32 s0, exec_lo, s0
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-TRUE16-NEXT:    s_and_not1_saveexec_b32 s0, s0
+; GFX11-TRUE16-NEXT:    s_cbranch_execz .LBB6_2
+; GFX11-TRUE16-NEXT:  ; %bb.1: ; %cmp.true
+; GFX11-TRUE16-NEXT:    v_add_nc_u32_e32 v17, 3, v17
+; GFX11-TRUE16-NEXT:    v_add_nc_u32_e32 v16, 3, v16
+; GFX11-TRUE16-NEXT:    v_add_nc_u32_e32 v15, 3, v15
+; GFX11-TRUE16-NEXT:    v_add_nc_u32_e32 v14, 3, v14
+; GFX11-TRUE16-NEXT:    v_add_nc_u32_e32 v13, 3, v13
+; GFX11-TRUE16-NEXT:    v_add_nc_u32_e32 v12, 3, v12
+; GFX11-TRUE16-NEXT:    v_add_nc_u32_e32 v11, 3, v11
+; GFX11-TRUE16-NEXT:    v_add_nc_u32_e32 v10, 3, v10
+; GFX11-TRUE16-NEXT:    v_add_nc_u32_e32 v9, 3, v9
+; GFX11-TRUE16-NEXT:    v_add_nc_u32_e32 v8, 3, v8
+; GFX11-TRUE16-NEXT:    v_add_nc_u32_e32 v7, 3, v7
+; GFX11-TRUE16-NEXT:    v_add_nc_u32_e32 v6, 3, v6
+; GFX11-TRUE16-NEXT:    v_add_nc_u32_e32 v5, 3, v5
+; GFX11-TRUE16-NEXT:    v_add_nc_u32_e32 v4, 3, v4
+; GFX11-TRUE16-NEXT:    v_add_nc_u32_e32 v3, 3, v3
+; GFX11-TRUE16-NEXT:    v_add_nc_u32_e32 v2, 3, v2
+; GFX11-TRUE16-NEXT:    v_add_nc_u32_e32 v1, 3, v1
+; GFX11-TRUE16-NEXT:    v_add_nc_u32_e32 v0, 3, v0
+; GFX11-TRUE16-NEXT:  .LBB6_2: ; %end
+; GFX11-TRUE16-NEXT:    s_or_b32 exec_lo, exec_lo, s0
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: bitcast_v18i32_to_v36i16:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v18
+; GFX11-FAKE16-NEXT:    ; implicit-def: $vgpr35
+; GFX11-FAKE16-NEXT:    ; implicit-def: $vgpr34
+; GFX11-FAKE16-NEXT:    ; implicit-def: $vgpr33
+; GFX11-FAKE16-NEXT:    ; implicit-def: $vgpr32
+; GFX11-FAKE16-NEXT:    ; implicit-def: $vgpr31
+; GFX11-FAKE16-NEXT:    ; implicit-def: $vgpr30
+; GFX11-FAKE16-NEXT:    ; implicit-def: $vgpr29
+; GFX11-FAKE16-NEXT:    ; implicit-def: $vgpr28
+; GFX11-FAKE16-NEXT:    ; implicit-def: $vgpr27
+; GFX11-FAKE16-NEXT:    ; implicit-def: $vgpr26
+; GFX11-FAKE16-NEXT:    ; implicit-def: $vgpr25
+; GFX11-FAKE16-NEXT:    ; implicit-def: $vgpr24
+; GFX11-FAKE16-NEXT:    ; implicit-def: $vgpr23
+; GFX11-FAKE16-NEXT:    ; implicit-def: $vgpr22
+; GFX11-FAKE16-NEXT:    ; implicit-def: $vgpr21
+; GFX11-FAKE16-NEXT:    ; implicit-def: $vgpr20
+; GFX11-FAKE16-NEXT:    ; implicit-def: $vgpr19
+; GFX11-FAKE16-NEXT:    ; implicit-def: $vgpr18
+; GFX11-FAKE16-NEXT:    s_and_saveexec_b32 s0, vcc_lo
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-FAKE16-NEXT:    s_xor_b32 s0, exec_lo, s0
+; GFX11-FAKE16-NEXT:    s_cbranch_execz .LBB6_2
+; GFX11-FAKE16-NEXT:  ; %bb.1: ; %cmp.false
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v18, 16, v17
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v19, 16, v16
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v20, 16, v15
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v21, 16, v14
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v22, 16, v13
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v23, 16, v12
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v24, 16, v11
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v25, 16, v10
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v26, 16, v9
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v27, 16, v8
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v28, 16, v7
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v29, 16, v6
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v30, 16, v5
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v31, 16, v4
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v32, 16, v3
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v33, 16, v2
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v34, 16, v1
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v35, 16, v0
+; GFX11-FAKE16-NEXT:  .LBB6_2: ; %Flow
+; GFX11-FAKE16-NEXT:    s_and_not1_saveexec_b32 s0, s0
+; GFX11-FAKE16-NEXT:    s_cbranch_execz .LBB6_4
+; GFX11-FAKE16-NEXT:  ; %bb.3: ; %cmp.true
+; GFX11-FAKE16-NEXT:    v_add_nc_u32_e32 v17, 3, v17
+; GFX11-FAKE16-NEXT:    v_add_nc_u32_e32 v16, 3, v16
+; GFX11-FAKE16-NEXT:    v_add_nc_u32_e32 v15, 3, v15
+; GFX11-FAKE16-NEXT:    v_add_nc_u32_e32 v14, 3, v14
+; GFX11-FAKE16-NEXT:    v_add_nc_u32_e32 v13, 3, v13
+; GFX11-FAKE16-NEXT:    v_add_nc_u32_e32 v12, 3, v12
+; GFX11-FAKE16-NEXT:    v_add_nc_u32_e32 v11, 3, v11
+; GFX11-FAKE16-NEXT:    v_add_nc_u32_e32 v10, 3, v10
+; GFX11-FAKE16-NEXT:    v_add_nc_u32_e32 v9, 3, v9
+; GFX11-FAKE16-NEXT:    v_add_nc_u32_e32 v8, 3, v8
+; GFX11-FAKE16-NEXT:    v_add_nc_u32_e32 v7, 3, v7
+; GFX11-FAKE16-NEXT:    v_add_nc_u32_e32 v6, 3, v6
+; GFX11-FAKE16-NEXT:    v_add_nc_u32_e32 v5, 3, v5
+; GFX11-FAKE16-NEXT:    v_add_nc_u32_e32 v4, 3, v4
+; GFX11-FAKE16-NEXT:    v_add_nc_u32_e32 v3, 3, v3
+; GFX11-FAKE16-NEXT:    v_add_nc_u32_e32 v2, 3, v2
+; GFX11-FAKE16-NEXT:    v_add_nc_u32_e32 v1, 3, v1
+; GFX11-FAKE16-NEXT:    v_add_nc_u32_e32 v0, 3, v0
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v18, 16, v17
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v19, 16, v16
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v20, 16, v15
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v21, 16, v14
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v22, 16, v13
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v23, 16, v12
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v24, 16, v11
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v25, 16, v10
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v26, 16, v9
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v27, 16, v8
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v28, 16, v7
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v29, 16, v6
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v30, 16, v5
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v31, 16, v4
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v32, 16, v3
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v33, 16, v2
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v34, 16, v1
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v35, 16, v0
+; GFX11-FAKE16-NEXT:  .LBB6_4: ; %end
+; GFX11-FAKE16-NEXT:    s_or_b32 exec_lo, exec_lo, s0
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-FAKE16-NEXT:    v_perm_b32 v0, v35, v0, 0x5040100
+; GFX11-FAKE16-NEXT:    v_perm_b32 v1, v34, v1, 0x5040100
+; GFX11-FAKE16-NEXT:    v_perm_b32 v2, v33, v2, 0x5040100
+; GFX11-FAKE16-NEXT:    v_perm_b32 v3, v32, v3, 0x5040100
+; GFX11-FAKE16-NEXT:    v_perm_b32 v4, v31, v4, 0x5040100
+; GFX11-FAKE16-NEXT:    v_perm_b32 v5, v30, v5, 0x5040100
+; GFX11-FAKE16-NEXT:    v_perm_b32 v6, v29, v6, 0x5040100
+; GFX11-FAKE16-NEXT:    v_perm_b32 v7, v28, v7, 0x5040100
+; GFX11-FAKE16-NEXT:    v_perm_b32 v8, v27, v8, 0x5040100
+; GFX11-FAKE16-NEXT:    v_perm_b32 v9, v26, v9, 0x5040100
+; GFX11-FAKE16-NEXT:    v_perm_b32 v10, v25, v10, 0x5040100
+; GFX11-FAKE16-NEXT:    v_perm_b32 v11, v24, v11, 0x5040100
+; GFX11-FAKE16-NEXT:    v_perm_b32 v12, v23, v12, 0x5040100
+; GFX11-FAKE16-NEXT:    v_perm_b32 v13, v22, v13, 0x5040100
+; GFX11-FAKE16-NEXT:    v_perm_b32 v14, v21, v14, 0x5040100
+; GFX11-FAKE16-NEXT:    v_perm_b32 v15, v20, v15, 0x5040100
+; GFX11-FAKE16-NEXT:    v_perm_b32 v16, v19, v16, 0x5040100
+; GFX11-FAKE16-NEXT:    v_perm_b32 v17, v18, v17, 0x5040100
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq i32 %b, 0
   br i1 %cmp, label %cmp.true, label %cmp.false
 
@@ -1963,73 +1996,105 @@ define <18 x i32> @bitcast_v36i16_to_v18i32(<36 x i16> %a, i32 %b) {
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: bitcast_v36i16_to_v18i32:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_lshrrev_b32_e32 v19, 16, v17
-; GFX11-NEXT:    v_lshrrev_b32_e32 v20, 16, v16
-; GFX11-NEXT:    v_lshrrev_b32_e32 v21, 16, v15
-; GFX11-NEXT:    v_lshrrev_b32_e32 v22, 16, v14
-; GFX11-NEXT:    v_lshrrev_b32_e32 v23, 16, v13
-; GFX11-NEXT:    v_lshrrev_b32_e32 v24, 16, v12
-; GFX11-NEXT:    v_lshrrev_b32_e32 v25, 16, v11
-; GFX11-NEXT:    v_lshrrev_b32_e32 v26, 16, v10
-; GFX11-NEXT:    v_lshrrev_b32_e32 v27, 16, v9
-; GFX11-NEXT:    v_lshrrev_b32_e32 v28, 16, v8
-; GFX11-NEXT:    v_lshrrev_b32_e32 v29, 16, v7
-; GFX11-NEXT:    v_lshrrev_b32_e32 v30, 16, v6
-; GFX11-NEXT:    v_lshrrev_b32_e32 v31, 16, v5
-; GFX11-NEXT:    v_lshrrev_b32_e32 v32, 16, v4
-; GFX11-NEXT:    v_lshrrev_b32_e32 v33, 16, v0
-; GFX11-NEXT:    v_lshrrev_b32_e32 v34, 16, v1
-; GFX11-NEXT:    v_lshrrev_b32_e32 v35, 16, v2
-; GFX11-NEXT:    v_lshrrev_b32_e32 v36, 16, v3
-; GFX11-NEXT:    v_perm_b32 v4, v32, v4, 0x5040100
-; GFX11-NEXT:    v_perm_b32 v0, v33, v0, 0x5040100
-; GFX11-NEXT:    v_perm_b32 v1, v34, v1, 0x5040100
-; GFX11-NEXT:    v_perm_b32 v2, v35, v2, 0x5040100
-; GFX11-NEXT:    v_perm_b32 v3, v36, v3, 0x5040100
-; GFX11-NEXT:    v_perm_b32 v5, v31, v5, 0x5040100
-; GFX11-NEXT:    v_perm_b32 v6, v30, v6, 0x5040100
-; GFX11-NEXT:    v_perm_b32 v7, v29, v7, 0x5040100
-; GFX11-NEXT:    v_perm_b32 v8, v28, v8, 0x5040100
-; GFX11-NEXT:    v_perm_b32 v9, v27, v9, 0x5040100
-; GFX11-NEXT:    v_perm_b32 v10, v26, v10, 0x5040100
-; GFX11-NEXT:    v_perm_b32 v11, v25, v11, 0x5040100
-; GFX11-NEXT:    v_perm_b32 v12, v24, v12, 0x5040100
-; GFX11-NEXT:    v_perm_b32 v13, v23, v13, 0x5040100
-; GFX11-NEXT:    v_perm_b32 v14, v22, v14, 0x5040100
-; GFX11-NEXT:    v_perm_b32 v15, v21, v15, 0x5040100
-; GFX11-NEXT:    v_perm_b32 v16, v20, v16, 0x5040100
-; GFX11-NEXT:    v_perm_b32 v17, v19, v17, 0x5040100
-; GFX11-NEXT:    s_mov_b32 s0, exec_lo
-; GFX11-NEXT:    v_cmpx_ne_u32_e32 0, v18
-; GFX11-NEXT:    s_xor_b32 s0, exec_lo, s0
-; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX11-NEXT:    s_and_not1_saveexec_b32 s0, s0
-; GFX11-NEXT:    s_cbranch_execz .LBB7_2
-; GFX11-NEXT:  ; %bb.1: ; %cmp.true
-; GFX11-NEXT:    v_pk_add_u16 v0, v0, 3 op_sel_hi:[1,0]
-; GFX11-NEXT:    v_pk_add_u16 v1, v1, 3 op_sel_hi:[1,0]
-; GFX11-NEXT:    v_pk_add_u16 v2, v2, 3 op_sel_hi:[1,0]
-; GFX11-NEXT:    v_pk_add_u16 v3, v3, 3 op_sel_hi:[1,0]
-; GFX11-NEXT:    v_pk_add_u16 v4, v4, 3 op_sel_hi:[1,0]
-; GFX11-NEXT:    v_pk_add_u16 v5, v5, 3 op_sel_hi:[1,0]
-; GFX11-NEXT:    v_pk_add_u16 v6, v6, 3 op_sel_hi:[1,0]
-; GFX11-NEXT:    v_pk_add_u16 v7, v7, 3 op_sel_hi:[1,0]
-; GFX11-NEXT:    v_pk_add_u16 v8, v8, 3 op_sel_hi:[1,0]
-; GFX11-NEXT:    v_pk_add_u16 v9, v9, 3 op_sel_hi:[1,0]
-; GFX11-NEXT:    v_pk_add_u16 v10, v10, 3 op_sel_hi:[1,0]
-; GFX11-NEXT:    v_pk_add_u16 v11, v11, 3 op_sel_hi:[1,0]
-; GFX11-NEXT:    v_pk_add_u16 v12, v12, 3 op_sel_hi:[1,0]
-; GFX11-NEXT:    v_pk_add_u16 v13, v13, 3 op_sel_hi:[1,0]
-; GFX11-NEXT:    v_pk_add_u16 v14, v14, 3 op_sel_hi:[1,0]
-; GFX11-NEXT:    v_pk_add_u16 v15, v15, 3 op_sel_hi:[1,0]
-; GFX11-NEXT:    v_pk_add_u16 v16, v16, 3 op_sel_hi:[1,0]
-; GFX11-NEXT:    v_pk_add_u16 v17, v17, 3 op_sel_hi:[1,0]
-; GFX11-NEXT:  .LBB7_2: ; %end
-; GFX11-NEXT:    s_or_b32 exec_lo, exec_lo, s0
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: bitcast_v36i16_to_v18i32:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    s_mov_b32 s0, exec_lo
+; GFX11-TRUE16-NEXT:    v_cmpx_ne_u32_e32 0, v18
+; GFX11-TRUE16-NEXT:    s_xor_b32 s0, exec_lo, s0
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-TRUE16-NEXT:    s_and_not1_saveexec_b32 s0, s0
+; GFX11-TRUE16-NEXT:    s_cbranch_execz .LBB7_2
+; GFX11-TRUE16-NEXT:  ; %bb.1: ; %cmp.true
+; GFX11-TRUE16-NEXT:    v_pk_add_u16 v0, v0, 3 op_sel_hi:[1,0]
+; GFX11-TRUE16-NEXT:    v_pk_add_u16 v1, v1, 3 op_sel_hi:[1,0]
+; GFX11-TRUE16-NEXT:    v_pk_add_u16 v2, v2, 3 op_sel_hi:[1,0]
+; GFX11-TRUE16-NEXT:    v_pk_add_u16 v3, v3, 3 op_sel_hi:[1,0]
+; GFX11-TRUE16-NEXT:    v_pk_add_u16 v4, v4, 3 op_sel_hi:[1,0]
+; GFX11-TRUE16-NEXT:    v_pk_add_u16 v5, v5, 3 op_sel_hi:[1,0]
+; GFX11-TRUE16-NEXT:    v_pk_add_u16 v6, v6, 3 op_sel_hi:[1,0]
+; GFX11-TRUE16-NEXT:    v_pk_add_u16 v7, v7, 3 op_sel_hi:[1,0]
+; GFX11-TRUE16-NEXT:    v_pk_add_u16 v8, v8, 3 op_sel_hi:[1,0]
+; GFX11-TRUE16-NEXT:    v_pk_add_u16 v9, v9, 3 op_sel_hi:[1,0]
+; GFX11-TRUE16-NEXT:    v_pk_add_u16 v10, v10, 3 op_sel_hi:[1,0]
+; GFX11-TRUE16-NEXT:    v_pk_add_u16 v11, v11, 3 op_sel_hi:[1,0]
+; GFX11-TRUE16-NEXT:    v_pk_add_u16 v12, v12, 3 op_sel_hi:[1,0]
+; GFX11-TRUE16-NEXT:    v_pk_add_u16 v13, v13, 3 op_sel_hi:[1,0]
+; GFX11-TRUE16-NEXT:    v_pk_add_u16 v14, v14, 3 op_sel_hi:[1,0]
+; GFX11-TRUE16-NEXT:    v_pk_add_u16 v15, v15, 3 op_sel_hi:[1,0]
+; GFX11-TRUE16-NEXT:    v_pk_add_u16 v16, v16, 3 op_sel_hi:[1,0]
+; GFX11-TRUE16-NEXT:    v_pk_add_u16 v17, v17, 3 op_sel_hi:[1,0]
+; GFX11-TRUE16-NEXT:  .LBB7_2: ; %end
+; GFX11-TRUE16-NEXT:    s_or_b32 exec_lo, exec_lo, s0
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: bitcast_v36i16_to_v18i32:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v19, 16, v17
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v20, 16, v16
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v21, 16, v15
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v22, 16, v14
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v23, 16, v13
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v24, 16, v12
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v25, 16, v11
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v26, 16, v10
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v27, 16, v9
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v28, 16, v8
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v29, 16, v7
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v30, 16, v6
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v31, 16, v5
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v32, 16, v4
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v33, 16, v0
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v34, 16, v1
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v35, 16, v2
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v36, 16, v3
+; GFX11-FAKE16-NEXT:    v_perm_b32 v4, v32, v4, 0x5040100
+; GFX11-FAKE16-NEXT:    v_perm_b32 v0, v33, v0, 0x5040100
+; GFX11-FAKE16-NEXT:    v_perm_b32 v1, v34, v1, 0x5040100
+; GFX11-FAKE16-NEXT:    v_perm_b32 v2, v35, v2, 0x5040100
+; GFX11-FAKE16-NEXT:    v_perm_b32 v3, v36, v3, 0x50401...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/138600


More information about the llvm-commits mailing list