[llvm] 4d566e5 - [AMDGPU] Precommit lit test.

Christudasan Devadasan via llvm-commits llvm-commits at lists.llvm.org
Thu Jan 18 20:02:22 PST 2024


Author: Christudasan Devadasan
Date: 2024-01-19T09:32:03+05:30
New Revision: 4d566e57a2403b32992cbdae133fb644866f0070

URL: https://github.com/llvm/llvm-project/commit/4d566e57a2403b32992cbdae133fb644866f0070
DIFF: https://github.com/llvm/llvm-project/commit/4d566e57a2403b32992cbdae133fb644866f0070.diff

LOG: [AMDGPU] Precommit lit test.

Added: 
    llvm/test/CodeGen/AMDGPU/vgpr-large-tuple-alloc-error.ll

Modified: 
    

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/AMDGPU/vgpr-large-tuple-alloc-error.ll b/llvm/test/CodeGen/AMDGPU/vgpr-large-tuple-alloc-error.ll
new file mode 100644
index 00000000000000..f3276719ac13ca
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/vgpr-large-tuple-alloc-error.ll
@@ -0,0 +1,545 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -mtriple amdgcn-amd-amdhsa -mcpu=gfx900 -O0 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX900 %s
+; RUN: llc -mtriple amdgcn-amd-amdhsa -mcpu=gfx906 -O0 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX906 %s
+; RUN: not --crash llc -mtriple amdgcn-amd-amdhsa -mcpu=gfx908 -O0 -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=GFX908-ERR %s
+; RUN: llc -mtriple amdgcn-amd-amdhsa -mcpu=gfx90a -O0 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX90a %s
+
+; This test crashes for gfx908 while allocating the tuple. Compared to the other subtargets,
+; gfx908 marks an extra VGPR reserved for AGPR to VGPR copy that puts more register pressure.
+
+; GFX908-ERR: error: ran out of registers during register allocation
+
+define i32 @test_tuple(<16 x i64> %0) {
+; GFX900-LABEL: test_tuple:
+; GFX900:       ; %bb.0:
+; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT:    s_xor_saveexec_b64 s[4:5], -1
+; GFX900-NEXT:    buffer_store_dword v31, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill
+; GFX900-NEXT:    s_mov_b64 exec, s[4:5]
+; GFX900-NEXT:    buffer_store_dword v40, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
+; GFX900-NEXT:    buffer_store_dword v41, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
+; GFX900-NEXT:    buffer_store_dword v42, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
+; GFX900-NEXT:    buffer_store_dword v43, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
+; GFX900-NEXT:    buffer_store_dword v44, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill
+; GFX900-NEXT:    buffer_store_dword v45, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill
+; GFX900-NEXT:    buffer_store_dword v46, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
+; GFX900-NEXT:    buffer_store_dword v47, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
+; GFX900-NEXT:    buffer_store_dword v56, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
+; GFX900-NEXT:    buffer_store_dword v57, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
+; GFX900-NEXT:    buffer_store_dword v58, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
+; GFX900-NEXT:    buffer_store_dword v59, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
+; GFX900-NEXT:    buffer_store_dword v60, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
+; GFX900-NEXT:    buffer_store_dword v61, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
+; GFX900-NEXT:    buffer_store_dword v62, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
+; GFX900-NEXT:    buffer_store_dword v63, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
+; GFX900-NEXT:    v_writelane_b32 v31, s36, 0
+; GFX900-NEXT:    v_writelane_b32 v31, s37, 1
+; GFX900-NEXT:    v_writelane_b32 v31, s38, 2
+; GFX900-NEXT:    v_writelane_b32 v31, s39, 3
+; GFX900-NEXT:    v_writelane_b32 v31, s40, 4
+; GFX900-NEXT:    v_writelane_b32 v31, s41, 5
+; GFX900-NEXT:    v_writelane_b32 v31, s42, 6
+; GFX900-NEXT:    v_writelane_b32 v31, s43, 7
+; GFX900-NEXT:    v_writelane_b32 v31, s44, 8
+; GFX900-NEXT:    v_writelane_b32 v31, s45, 9
+; GFX900-NEXT:    v_writelane_b32 v31, s46, 10
+; GFX900-NEXT:    v_writelane_b32 v31, s47, 11
+; GFX900-NEXT:    v_writelane_b32 v31, s48, 12
+; GFX900-NEXT:    v_writelane_b32 v31, s49, 13
+; GFX900-NEXT:    v_writelane_b32 v31, s50, 14
+; GFX900-NEXT:    v_writelane_b32 v31, s51, 15
+; GFX900-NEXT:    v_writelane_b32 v31, s52, 16
+; GFX900-NEXT:    v_writelane_b32 v31, s53, 17
+; GFX900-NEXT:    v_writelane_b32 v31, s54, 18
+; GFX900-NEXT:    v_writelane_b32 v31, s55, 19
+; GFX900-NEXT:    v_writelane_b32 v31, s56, 20
+; GFX900-NEXT:    v_writelane_b32 v31, s57, 21
+; GFX900-NEXT:    v_writelane_b32 v31, s58, 22
+; GFX900-NEXT:    v_writelane_b32 v31, s59, 23
+; GFX900-NEXT:    v_writelane_b32 v31, s60, 24
+; GFX900-NEXT:    v_writelane_b32 v31, s61, 25
+; GFX900-NEXT:    v_writelane_b32 v31, s62, 26
+; GFX900-NEXT:    v_writelane_b32 v31, s63, 27
+; GFX900-NEXT:    v_writelane_b32 v31, s64, 28
+; GFX900-NEXT:    v_writelane_b32 v31, s65, 29
+; GFX900-NEXT:    v_writelane_b32 v31, s66, 30
+; GFX900-NEXT:    v_writelane_b32 v31, s67, 31
+; GFX900-NEXT:    v_mov_b32_e32 v32, v0
+; GFX900-NEXT:    buffer_load_dword v0, off, s[0:3], s32
+; GFX900-NEXT:    ; implicit-def: $sgpr4
+; GFX900-NEXT:    ; implicit-def: $sgpr4
+; GFX900-NEXT:    ; implicit-def: $sgpr4
+; GFX900-NEXT:    ; implicit-def: $sgpr4
+; GFX900-NEXT:    ; implicit-def: $sgpr4
+; GFX900-NEXT:    ; implicit-def: $sgpr4
+; GFX900-NEXT:    ; implicit-def: $sgpr4
+; GFX900-NEXT:    ; implicit-def: $sgpr4
+; GFX900-NEXT:    ; implicit-def: $sgpr4
+; GFX900-NEXT:    ; implicit-def: $sgpr4
+; GFX900-NEXT:    ; implicit-def: $sgpr4
+; GFX900-NEXT:    ; implicit-def: $sgpr4
+; GFX900-NEXT:    ; implicit-def: $sgpr4
+; GFX900-NEXT:    ; implicit-def: $sgpr4
+; GFX900-NEXT:    ; implicit-def: $sgpr4
+; GFX900-NEXT:    ; implicit-def: $sgpr4
+; GFX900-NEXT:    ; implicit-def: $sgpr4
+; GFX900-NEXT:    ; implicit-def: $sgpr4
+; GFX900-NEXT:    ; implicit-def: $sgpr4
+; GFX900-NEXT:    ; implicit-def: $sgpr4
+; GFX900-NEXT:    ; implicit-def: $sgpr4
+; GFX900-NEXT:    ; implicit-def: $sgpr4
+; GFX900-NEXT:    ; implicit-def: $sgpr4
+; GFX900-NEXT:    ; implicit-def: $sgpr4
+; GFX900-NEXT:    ; implicit-def: $sgpr4
+; GFX900-NEXT:    ; implicit-def: $sgpr4
+; GFX900-NEXT:    ; implicit-def: $sgpr4
+; GFX900-NEXT:    ; implicit-def: $sgpr4
+; GFX900-NEXT:    ; implicit-def: $sgpr4
+; GFX900-NEXT:    ; implicit-def: $sgpr4
+; GFX900-NEXT:    ; implicit-def: $sgpr4
+; GFX900-NEXT:    ; implicit-def: $sgpr4
+; GFX900-NEXT:    ; kill: def $vgpr32 killed $vgpr32 def $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63 killed $exec
+; GFX900-NEXT:    v_mov_b32_e32 v33, v1
+; GFX900-NEXT:    v_mov_b32_e32 v34, v2
+; GFX900-NEXT:    v_mov_b32_e32 v35, v3
+; GFX900-NEXT:    v_mov_b32_e32 v36, v4
+; GFX900-NEXT:    v_mov_b32_e32 v37, v5
+; GFX900-NEXT:    v_mov_b32_e32 v38, v6
+; GFX900-NEXT:    v_mov_b32_e32 v39, v7
+; GFX900-NEXT:    v_mov_b32_e32 v40, v8
+; GFX900-NEXT:    v_mov_b32_e32 v41, v9
+; GFX900-NEXT:    v_mov_b32_e32 v42, v10
+; GFX900-NEXT:    v_mov_b32_e32 v43, v11
+; GFX900-NEXT:    v_mov_b32_e32 v44, v12
+; GFX900-NEXT:    v_mov_b32_e32 v45, v13
+; GFX900-NEXT:    v_mov_b32_e32 v46, v14
+; GFX900-NEXT:    v_mov_b32_e32 v47, v15
+; GFX900-NEXT:    v_mov_b32_e32 v48, v16
+; GFX900-NEXT:    v_mov_b32_e32 v49, v17
+; GFX900-NEXT:    v_mov_b32_e32 v50, v18
+; GFX900-NEXT:    v_mov_b32_e32 v51, v19
+; GFX900-NEXT:    v_mov_b32_e32 v52, v20
+; GFX900-NEXT:    v_mov_b32_e32 v53, v21
+; GFX900-NEXT:    v_mov_b32_e32 v54, v22
+; GFX900-NEXT:    v_mov_b32_e32 v55, v23
+; GFX900-NEXT:    v_mov_b32_e32 v56, v24
+; GFX900-NEXT:    v_mov_b32_e32 v57, v25
+; GFX900-NEXT:    v_mov_b32_e32 v58, v26
+; GFX900-NEXT:    v_mov_b32_e32 v59, v27
+; GFX900-NEXT:    v_mov_b32_e32 v60, v28
+; GFX900-NEXT:    v_mov_b32_e32 v61, v29
+; GFX900-NEXT:    v_mov_b32_e32 v62, v30
+; GFX900-NEXT:    ; kill: def $vgpr63 killed $vgpr0 killed $exec
+; GFX900-NEXT:    ; implicit-def: $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67
+; GFX900-NEXT:    s_waitcnt vmcnt(0)
+; GFX900-NEXT:    v_mov_b32_e32 v0, 0
+; GFX900-NEXT:    v_readlane_b32 s67, v31, 31
+; GFX900-NEXT:    v_readlane_b32 s66, v31, 30
+; GFX900-NEXT:    v_readlane_b32 s65, v31, 29
+; GFX900-NEXT:    v_readlane_b32 s64, v31, 28
+; GFX900-NEXT:    v_readlane_b32 s63, v31, 27
+; GFX900-NEXT:    v_readlane_b32 s62, v31, 26
+; GFX900-NEXT:    v_readlane_b32 s61, v31, 25
+; GFX900-NEXT:    v_readlane_b32 s60, v31, 24
+; GFX900-NEXT:    v_readlane_b32 s59, v31, 23
+; GFX900-NEXT:    v_readlane_b32 s58, v31, 22
+; GFX900-NEXT:    v_readlane_b32 s57, v31, 21
+; GFX900-NEXT:    v_readlane_b32 s56, v31, 20
+; GFX900-NEXT:    v_readlane_b32 s55, v31, 19
+; GFX900-NEXT:    v_readlane_b32 s54, v31, 18
+; GFX900-NEXT:    v_readlane_b32 s53, v31, 17
+; GFX900-NEXT:    v_readlane_b32 s52, v31, 16
+; GFX900-NEXT:    v_readlane_b32 s51, v31, 15
+; GFX900-NEXT:    v_readlane_b32 s50, v31, 14
+; GFX900-NEXT:    v_readlane_b32 s49, v31, 13
+; GFX900-NEXT:    v_readlane_b32 s48, v31, 12
+; GFX900-NEXT:    v_readlane_b32 s47, v31, 11
+; GFX900-NEXT:    v_readlane_b32 s46, v31, 10
+; GFX900-NEXT:    v_readlane_b32 s45, v31, 9
+; GFX900-NEXT:    v_readlane_b32 s44, v31, 8
+; GFX900-NEXT:    v_readlane_b32 s43, v31, 7
+; GFX900-NEXT:    v_readlane_b32 s42, v31, 6
+; GFX900-NEXT:    v_readlane_b32 s41, v31, 5
+; GFX900-NEXT:    v_readlane_b32 s40, v31, 4
+; GFX900-NEXT:    v_readlane_b32 s39, v31, 3
+; GFX900-NEXT:    v_readlane_b32 s38, v31, 2
+; GFX900-NEXT:    v_readlane_b32 s37, v31, 1
+; GFX900-NEXT:    v_readlane_b32 s36, v31, 0
+; GFX900-NEXT:    buffer_load_dword v63, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
+; GFX900-NEXT:    buffer_load_dword v62, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
+; GFX900-NEXT:    buffer_load_dword v61, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
+; GFX900-NEXT:    buffer_load_dword v60, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
+; GFX900-NEXT:    buffer_load_dword v59, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
+; GFX900-NEXT:    buffer_load_dword v58, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
+; GFX900-NEXT:    buffer_load_dword v57, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
+; GFX900-NEXT:    buffer_load_dword v56, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
+; GFX900-NEXT:    buffer_load_dword v47, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
+; GFX900-NEXT:    buffer_load_dword v46, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
+; GFX900-NEXT:    buffer_load_dword v45, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
+; GFX900-NEXT:    buffer_load_dword v44, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
+; GFX900-NEXT:    buffer_load_dword v43, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
+; GFX900-NEXT:    buffer_load_dword v42, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
+; GFX900-NEXT:    buffer_load_dword v41, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
+; GFX900-NEXT:    buffer_load_dword v40, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
+; GFX900-NEXT:    s_xor_saveexec_b64 s[4:5], -1
+; GFX900-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload
+; GFX900-NEXT:    s_mov_b64 exec, s[4:5]
+; GFX900-NEXT:    s_waitcnt vmcnt(0)
+; GFX900-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-LABEL: test_tuple:
+; GFX906:       ; %bb.0:
+; GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-NEXT:    s_xor_saveexec_b64 s[4:5], -1
+; GFX906-NEXT:    buffer_store_dword v31, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill
+; GFX906-NEXT:    s_mov_b64 exec, s[4:5]
+; GFX906-NEXT:    buffer_store_dword v40, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
+; GFX906-NEXT:    buffer_store_dword v41, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
+; GFX906-NEXT:    buffer_store_dword v42, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
+; GFX906-NEXT:    buffer_store_dword v43, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
+; GFX906-NEXT:    buffer_store_dword v44, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill
+; GFX906-NEXT:    buffer_store_dword v45, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill
+; GFX906-NEXT:    buffer_store_dword v46, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
+; GFX906-NEXT:    buffer_store_dword v47, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
+; GFX906-NEXT:    buffer_store_dword v56, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
+; GFX906-NEXT:    buffer_store_dword v57, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
+; GFX906-NEXT:    buffer_store_dword v58, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
+; GFX906-NEXT:    buffer_store_dword v59, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
+; GFX906-NEXT:    buffer_store_dword v60, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
+; GFX906-NEXT:    buffer_store_dword v61, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
+; GFX906-NEXT:    buffer_store_dword v62, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
+; GFX906-NEXT:    buffer_store_dword v63, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
+; GFX906-NEXT:    v_writelane_b32 v31, s36, 0
+; GFX906-NEXT:    v_writelane_b32 v31, s37, 1
+; GFX906-NEXT:    v_writelane_b32 v31, s38, 2
+; GFX906-NEXT:    v_writelane_b32 v31, s39, 3
+; GFX906-NEXT:    v_writelane_b32 v31, s40, 4
+; GFX906-NEXT:    v_writelane_b32 v31, s41, 5
+; GFX906-NEXT:    v_writelane_b32 v31, s42, 6
+; GFX906-NEXT:    v_writelane_b32 v31, s43, 7
+; GFX906-NEXT:    v_writelane_b32 v31, s44, 8
+; GFX906-NEXT:    v_writelane_b32 v31, s45, 9
+; GFX906-NEXT:    v_writelane_b32 v31, s46, 10
+; GFX906-NEXT:    v_writelane_b32 v31, s47, 11
+; GFX906-NEXT:    v_writelane_b32 v31, s48, 12
+; GFX906-NEXT:    v_writelane_b32 v31, s49, 13
+; GFX906-NEXT:    v_writelane_b32 v31, s50, 14
+; GFX906-NEXT:    v_writelane_b32 v31, s51, 15
+; GFX906-NEXT:    v_writelane_b32 v31, s52, 16
+; GFX906-NEXT:    v_writelane_b32 v31, s53, 17
+; GFX906-NEXT:    v_writelane_b32 v31, s54, 18
+; GFX906-NEXT:    v_writelane_b32 v31, s55, 19
+; GFX906-NEXT:    v_writelane_b32 v31, s56, 20
+; GFX906-NEXT:    v_writelane_b32 v31, s57, 21
+; GFX906-NEXT:    v_writelane_b32 v31, s58, 22
+; GFX906-NEXT:    v_writelane_b32 v31, s59, 23
+; GFX906-NEXT:    v_writelane_b32 v31, s60, 24
+; GFX906-NEXT:    v_writelane_b32 v31, s61, 25
+; GFX906-NEXT:    v_writelane_b32 v31, s62, 26
+; GFX906-NEXT:    v_writelane_b32 v31, s63, 27
+; GFX906-NEXT:    v_writelane_b32 v31, s64, 28
+; GFX906-NEXT:    v_writelane_b32 v31, s65, 29
+; GFX906-NEXT:    v_writelane_b32 v31, s66, 30
+; GFX906-NEXT:    v_writelane_b32 v31, s67, 31
+; GFX906-NEXT:    v_mov_b32_e32 v32, v0
+; GFX906-NEXT:    buffer_load_dword v0, off, s[0:3], s32
+; GFX906-NEXT:    ; implicit-def: $sgpr4
+; GFX906-NEXT:    ; implicit-def: $sgpr4
+; GFX906-NEXT:    ; implicit-def: $sgpr4
+; GFX906-NEXT:    ; implicit-def: $sgpr4
+; GFX906-NEXT:    ; implicit-def: $sgpr4
+; GFX906-NEXT:    ; implicit-def: $sgpr4
+; GFX906-NEXT:    ; implicit-def: $sgpr4
+; GFX906-NEXT:    ; implicit-def: $sgpr4
+; GFX906-NEXT:    ; implicit-def: $sgpr4
+; GFX906-NEXT:    ; implicit-def: $sgpr4
+; GFX906-NEXT:    ; implicit-def: $sgpr4
+; GFX906-NEXT:    ; implicit-def: $sgpr4
+; GFX906-NEXT:    ; implicit-def: $sgpr4
+; GFX906-NEXT:    ; implicit-def: $sgpr4
+; GFX906-NEXT:    ; implicit-def: $sgpr4
+; GFX906-NEXT:    ; implicit-def: $sgpr4
+; GFX906-NEXT:    ; implicit-def: $sgpr4
+; GFX906-NEXT:    ; implicit-def: $sgpr4
+; GFX906-NEXT:    ; implicit-def: $sgpr4
+; GFX906-NEXT:    ; implicit-def: $sgpr4
+; GFX906-NEXT:    ; implicit-def: $sgpr4
+; GFX906-NEXT:    ; implicit-def: $sgpr4
+; GFX906-NEXT:    ; implicit-def: $sgpr4
+; GFX906-NEXT:    ; implicit-def: $sgpr4
+; GFX906-NEXT:    ; implicit-def: $sgpr4
+; GFX906-NEXT:    ; implicit-def: $sgpr4
+; GFX906-NEXT:    ; implicit-def: $sgpr4
+; GFX906-NEXT:    ; implicit-def: $sgpr4
+; GFX906-NEXT:    ; implicit-def: $sgpr4
+; GFX906-NEXT:    ; implicit-def: $sgpr4
+; GFX906-NEXT:    ; implicit-def: $sgpr4
+; GFX906-NEXT:    ; implicit-def: $sgpr4
+; GFX906-NEXT:    ; kill: def $vgpr32 killed $vgpr32 def $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63 killed $exec
+; GFX906-NEXT:    v_mov_b32_e32 v33, v1
+; GFX906-NEXT:    v_mov_b32_e32 v34, v2
+; GFX906-NEXT:    v_mov_b32_e32 v35, v3
+; GFX906-NEXT:    v_mov_b32_e32 v36, v4
+; GFX906-NEXT:    v_mov_b32_e32 v37, v5
+; GFX906-NEXT:    v_mov_b32_e32 v38, v6
+; GFX906-NEXT:    v_mov_b32_e32 v39, v7
+; GFX906-NEXT:    v_mov_b32_e32 v40, v8
+; GFX906-NEXT:    v_mov_b32_e32 v41, v9
+; GFX906-NEXT:    v_mov_b32_e32 v42, v10
+; GFX906-NEXT:    v_mov_b32_e32 v43, v11
+; GFX906-NEXT:    v_mov_b32_e32 v44, v12
+; GFX906-NEXT:    v_mov_b32_e32 v45, v13
+; GFX906-NEXT:    v_mov_b32_e32 v46, v14
+; GFX906-NEXT:    v_mov_b32_e32 v47, v15
+; GFX906-NEXT:    v_mov_b32_e32 v48, v16
+; GFX906-NEXT:    v_mov_b32_e32 v49, v17
+; GFX906-NEXT:    v_mov_b32_e32 v50, v18
+; GFX906-NEXT:    v_mov_b32_e32 v51, v19
+; GFX906-NEXT:    v_mov_b32_e32 v52, v20
+; GFX906-NEXT:    v_mov_b32_e32 v53, v21
+; GFX906-NEXT:    v_mov_b32_e32 v54, v22
+; GFX906-NEXT:    v_mov_b32_e32 v55, v23
+; GFX906-NEXT:    v_mov_b32_e32 v56, v24
+; GFX906-NEXT:    v_mov_b32_e32 v57, v25
+; GFX906-NEXT:    v_mov_b32_e32 v58, v26
+; GFX906-NEXT:    v_mov_b32_e32 v59, v27
+; GFX906-NEXT:    v_mov_b32_e32 v60, v28
+; GFX906-NEXT:    v_mov_b32_e32 v61, v29
+; GFX906-NEXT:    v_mov_b32_e32 v62, v30
+; GFX906-NEXT:    ; kill: def $vgpr63 killed $vgpr0 killed $exec
+; GFX906-NEXT:    ; implicit-def: $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67
+; GFX906-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-NEXT:    v_mov_b32_e32 v0, 0
+; GFX906-NEXT:    v_readlane_b32 s67, v31, 31
+; GFX906-NEXT:    v_readlane_b32 s66, v31, 30
+; GFX906-NEXT:    v_readlane_b32 s65, v31, 29
+; GFX906-NEXT:    v_readlane_b32 s64, v31, 28
+; GFX906-NEXT:    v_readlane_b32 s63, v31, 27
+; GFX906-NEXT:    v_readlane_b32 s62, v31, 26
+; GFX906-NEXT:    v_readlane_b32 s61, v31, 25
+; GFX906-NEXT:    v_readlane_b32 s60, v31, 24
+; GFX906-NEXT:    v_readlane_b32 s59, v31, 23
+; GFX906-NEXT:    v_readlane_b32 s58, v31, 22
+; GFX906-NEXT:    v_readlane_b32 s57, v31, 21
+; GFX906-NEXT:    v_readlane_b32 s56, v31, 20
+; GFX906-NEXT:    v_readlane_b32 s55, v31, 19
+; GFX906-NEXT:    v_readlane_b32 s54, v31, 18
+; GFX906-NEXT:    v_readlane_b32 s53, v31, 17
+; GFX906-NEXT:    v_readlane_b32 s52, v31, 16
+; GFX906-NEXT:    v_readlane_b32 s51, v31, 15
+; GFX906-NEXT:    v_readlane_b32 s50, v31, 14
+; GFX906-NEXT:    v_readlane_b32 s49, v31, 13
+; GFX906-NEXT:    v_readlane_b32 s48, v31, 12
+; GFX906-NEXT:    v_readlane_b32 s47, v31, 11
+; GFX906-NEXT:    v_readlane_b32 s46, v31, 10
+; GFX906-NEXT:    v_readlane_b32 s45, v31, 9
+; GFX906-NEXT:    v_readlane_b32 s44, v31, 8
+; GFX906-NEXT:    v_readlane_b32 s43, v31, 7
+; GFX906-NEXT:    v_readlane_b32 s42, v31, 6
+; GFX906-NEXT:    v_readlane_b32 s41, v31, 5
+; GFX906-NEXT:    v_readlane_b32 s40, v31, 4
+; GFX906-NEXT:    v_readlane_b32 s39, v31, 3
+; GFX906-NEXT:    v_readlane_b32 s38, v31, 2
+; GFX906-NEXT:    v_readlane_b32 s37, v31, 1
+; GFX906-NEXT:    v_readlane_b32 s36, v31, 0
+; GFX906-NEXT:    buffer_load_dword v63, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
+; GFX906-NEXT:    buffer_load_dword v62, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
+; GFX906-NEXT:    buffer_load_dword v61, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
+; GFX906-NEXT:    buffer_load_dword v60, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
+; GFX906-NEXT:    buffer_load_dword v59, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
+; GFX906-NEXT:    buffer_load_dword v58, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
+; GFX906-NEXT:    buffer_load_dword v57, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
+; GFX906-NEXT:    buffer_load_dword v56, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
+; GFX906-NEXT:    buffer_load_dword v47, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
+; GFX906-NEXT:    buffer_load_dword v46, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
+; GFX906-NEXT:    buffer_load_dword v45, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
+; GFX906-NEXT:    buffer_load_dword v44, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
+; GFX906-NEXT:    buffer_load_dword v43, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
+; GFX906-NEXT:    buffer_load_dword v42, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
+; GFX906-NEXT:    buffer_load_dword v41, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
+; GFX906-NEXT:    buffer_load_dword v40, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
+; GFX906-NEXT:    s_xor_saveexec_b64 s[4:5], -1
+; GFX906-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload
+; GFX906-NEXT:    s_mov_b64 exec, s[4:5]
+; GFX906-NEXT:    s_waitcnt vmcnt(0)
+; GFX906-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90a-LABEL: test_tuple:
+; GFX90a:       ; %bb.0:
+; GFX90a-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90a-NEXT:    s_xor_saveexec_b64 s[4:5], -1
+; GFX90a-NEXT:    buffer_store_dword v31, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
+; GFX90a-NEXT:    s_mov_b64 exec, s[4:5]
+; GFX90a-NEXT:    v_accvgpr_write_b32 a0, v40 ; Reload Reuse
+; GFX90a-NEXT:    v_accvgpr_write_b32 a1, v41 ; Reload Reuse
+; GFX90a-NEXT:    v_accvgpr_write_b32 a2, v42 ; Reload Reuse
+; GFX90a-NEXT:    v_accvgpr_write_b32 a3, v43 ; Reload Reuse
+; GFX90a-NEXT:    v_accvgpr_write_b32 a4, v44 ; Reload Reuse
+; GFX90a-NEXT:    v_accvgpr_write_b32 a5, v45 ; Reload Reuse
+; GFX90a-NEXT:    v_accvgpr_write_b32 a6, v46 ; Reload Reuse
+; GFX90a-NEXT:    v_accvgpr_write_b32 a7, v47 ; Reload Reuse
+; GFX90a-NEXT:    v_accvgpr_write_b32 a8, v56 ; Reload Reuse
+; GFX90a-NEXT:    v_accvgpr_write_b32 a9, v57 ; Reload Reuse
+; GFX90a-NEXT:    v_accvgpr_write_b32 a10, v58 ; Reload Reuse
+; GFX90a-NEXT:    v_accvgpr_write_b32 a11, v59 ; Reload Reuse
+; GFX90a-NEXT:    v_accvgpr_write_b32 a12, v60 ; Reload Reuse
+; GFX90a-NEXT:    v_accvgpr_write_b32 a13, v61 ; Reload Reuse
+; GFX90a-NEXT:    v_accvgpr_write_b32 a14, v62 ; Reload Reuse
+; GFX90a-NEXT:    v_accvgpr_write_b32 a15, v63 ; Reload Reuse
+; GFX90a-NEXT:    v_writelane_b32 v31, s36, 0
+; GFX90a-NEXT:    v_writelane_b32 v31, s37, 1
+; GFX90a-NEXT:    v_writelane_b32 v31, s38, 2
+; GFX90a-NEXT:    v_writelane_b32 v31, s39, 3
+; GFX90a-NEXT:    v_writelane_b32 v31, s40, 4
+; GFX90a-NEXT:    v_writelane_b32 v31, s41, 5
+; GFX90a-NEXT:    v_writelane_b32 v31, s42, 6
+; GFX90a-NEXT:    v_writelane_b32 v31, s43, 7
+; GFX90a-NEXT:    v_writelane_b32 v31, s44, 8
+; GFX90a-NEXT:    v_writelane_b32 v31, s45, 9
+; GFX90a-NEXT:    v_writelane_b32 v31, s46, 10
+; GFX90a-NEXT:    v_writelane_b32 v31, s47, 11
+; GFX90a-NEXT:    v_writelane_b32 v31, s48, 12
+; GFX90a-NEXT:    v_writelane_b32 v31, s49, 13
+; GFX90a-NEXT:    v_writelane_b32 v31, s50, 14
+; GFX90a-NEXT:    v_writelane_b32 v31, s51, 15
+; GFX90a-NEXT:    v_writelane_b32 v31, s52, 16
+; GFX90a-NEXT:    v_writelane_b32 v31, s53, 17
+; GFX90a-NEXT:    v_writelane_b32 v31, s54, 18
+; GFX90a-NEXT:    v_writelane_b32 v31, s55, 19
+; GFX90a-NEXT:    v_writelane_b32 v31, s56, 20
+; GFX90a-NEXT:    v_writelane_b32 v31, s57, 21
+; GFX90a-NEXT:    v_writelane_b32 v31, s58, 22
+; GFX90a-NEXT:    v_writelane_b32 v31, s59, 23
+; GFX90a-NEXT:    v_writelane_b32 v31, s60, 24
+; GFX90a-NEXT:    v_writelane_b32 v31, s61, 25
+; GFX90a-NEXT:    v_writelane_b32 v31, s62, 26
+; GFX90a-NEXT:    v_writelane_b32 v31, s63, 27
+; GFX90a-NEXT:    v_writelane_b32 v31, s64, 28
+; GFX90a-NEXT:    v_writelane_b32 v31, s65, 29
+; GFX90a-NEXT:    v_writelane_b32 v31, s66, 30
+; GFX90a-NEXT:    v_writelane_b32 v31, s67, 31
+; GFX90a-NEXT:    v_mov_b32_e32 v32, v0
+; GFX90a-NEXT:    buffer_load_dword v0, off, s[0:3], s32
+; GFX90a-NEXT:    ; implicit-def: $sgpr4
+; GFX90a-NEXT:    ; implicit-def: $sgpr4
+; GFX90a-NEXT:    ; implicit-def: $sgpr4
+; GFX90a-NEXT:    ; implicit-def: $sgpr4
+; GFX90a-NEXT:    ; implicit-def: $sgpr4
+; GFX90a-NEXT:    ; implicit-def: $sgpr4
+; GFX90a-NEXT:    ; implicit-def: $sgpr4
+; GFX90a-NEXT:    ; implicit-def: $sgpr4
+; GFX90a-NEXT:    ; implicit-def: $sgpr4
+; GFX90a-NEXT:    ; implicit-def: $sgpr4
+; GFX90a-NEXT:    ; implicit-def: $sgpr4
+; GFX90a-NEXT:    ; implicit-def: $sgpr4
+; GFX90a-NEXT:    ; implicit-def: $sgpr4
+; GFX90a-NEXT:    ; implicit-def: $sgpr4
+; GFX90a-NEXT:    ; implicit-def: $sgpr4
+; GFX90a-NEXT:    ; implicit-def: $sgpr4
+; GFX90a-NEXT:    ; implicit-def: $sgpr4
+; GFX90a-NEXT:    ; implicit-def: $sgpr4
+; GFX90a-NEXT:    ; implicit-def: $sgpr4
+; GFX90a-NEXT:    ; implicit-def: $sgpr4
+; GFX90a-NEXT:    ; implicit-def: $sgpr4
+; GFX90a-NEXT:    ; implicit-def: $sgpr4
+; GFX90a-NEXT:    ; implicit-def: $sgpr4
+; GFX90a-NEXT:    ; implicit-def: $sgpr4
+; GFX90a-NEXT:    ; implicit-def: $sgpr4
+; GFX90a-NEXT:    ; implicit-def: $sgpr4
+; GFX90a-NEXT:    ; implicit-def: $sgpr4
+; GFX90a-NEXT:    ; implicit-def: $sgpr4
+; GFX90a-NEXT:    ; implicit-def: $sgpr4
+; GFX90a-NEXT:    ; implicit-def: $sgpr4
+; GFX90a-NEXT:    ; implicit-def: $sgpr4
+; GFX90a-NEXT:    ; implicit-def: $sgpr4
+; GFX90a-NEXT:    ; kill: def $vgpr32 killed $vgpr32 def $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63 killed $exec
+; GFX90a-NEXT:    v_mov_b32_e32 v33, v1
+; GFX90a-NEXT:    v_mov_b32_e32 v34, v2
+; GFX90a-NEXT:    v_mov_b32_e32 v35, v3
+; GFX90a-NEXT:    v_mov_b32_e32 v36, v4
+; GFX90a-NEXT:    v_mov_b32_e32 v37, v5
+; GFX90a-NEXT:    v_mov_b32_e32 v38, v6
+; GFX90a-NEXT:    v_mov_b32_e32 v39, v7
+; GFX90a-NEXT:    v_mov_b32_e32 v40, v8
+; GFX90a-NEXT:    v_mov_b32_e32 v41, v9
+; GFX90a-NEXT:    v_mov_b32_e32 v42, v10
+; GFX90a-NEXT:    v_mov_b32_e32 v43, v11
+; GFX90a-NEXT:    v_mov_b32_e32 v44, v12
+; GFX90a-NEXT:    v_mov_b32_e32 v45, v13
+; GFX90a-NEXT:    v_mov_b32_e32 v46, v14
+; GFX90a-NEXT:    v_mov_b32_e32 v47, v15
+; GFX90a-NEXT:    v_mov_b32_e32 v48, v16
+; GFX90a-NEXT:    v_mov_b32_e32 v49, v17
+; GFX90a-NEXT:    v_mov_b32_e32 v50, v18
+; GFX90a-NEXT:    v_mov_b32_e32 v51, v19
+; GFX90a-NEXT:    v_mov_b32_e32 v52, v20
+; GFX90a-NEXT:    v_mov_b32_e32 v53, v21
+; GFX90a-NEXT:    v_mov_b32_e32 v54, v22
+; GFX90a-NEXT:    v_mov_b32_e32 v55, v23
+; GFX90a-NEXT:    v_mov_b32_e32 v56, v24
+; GFX90a-NEXT:    v_mov_b32_e32 v57, v25
+; GFX90a-NEXT:    v_mov_b32_e32 v58, v26
+; GFX90a-NEXT:    v_mov_b32_e32 v59, v27
+; GFX90a-NEXT:    v_mov_b32_e32 v60, v28
+; GFX90a-NEXT:    v_mov_b32_e32 v61, v29
+; GFX90a-NEXT:    v_mov_b32_e32 v62, v30
+; GFX90a-NEXT:    ; kill: def $vgpr63 killed $vgpr0 killed $exec
+; GFX90a-NEXT:    ; implicit-def: $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67
+; GFX90a-NEXT:    s_waitcnt vmcnt(0)
+; GFX90a-NEXT:    v_mov_b32_e32 v0, 0
+; GFX90a-NEXT:    v_readlane_b32 s67, v31, 31
+; GFX90a-NEXT:    v_readlane_b32 s66, v31, 30
+; GFX90a-NEXT:    v_readlane_b32 s65, v31, 29
+; GFX90a-NEXT:    v_readlane_b32 s64, v31, 28
+; GFX90a-NEXT:    v_readlane_b32 s63, v31, 27
+; GFX90a-NEXT:    v_readlane_b32 s62, v31, 26
+; GFX90a-NEXT:    v_readlane_b32 s61, v31, 25
+; GFX90a-NEXT:    v_readlane_b32 s60, v31, 24
+; GFX90a-NEXT:    v_readlane_b32 s59, v31, 23
+; GFX90a-NEXT:    v_readlane_b32 s58, v31, 22
+; GFX90a-NEXT:    v_readlane_b32 s57, v31, 21
+; GFX90a-NEXT:    v_readlane_b32 s56, v31, 20
+; GFX90a-NEXT:    v_readlane_b32 s55, v31, 19
+; GFX90a-NEXT:    v_readlane_b32 s54, v31, 18
+; GFX90a-NEXT:    v_readlane_b32 s53, v31, 17
+; GFX90a-NEXT:    v_readlane_b32 s52, v31, 16
+; GFX90a-NEXT:    v_readlane_b32 s51, v31, 15
+; GFX90a-NEXT:    v_readlane_b32 s50, v31, 14
+; GFX90a-NEXT:    v_readlane_b32 s49, v31, 13
+; GFX90a-NEXT:    v_readlane_b32 s48, v31, 12
+; GFX90a-NEXT:    v_readlane_b32 s47, v31, 11
+; GFX90a-NEXT:    v_readlane_b32 s46, v31, 10
+; GFX90a-NEXT:    v_readlane_b32 s45, v31, 9
+; GFX90a-NEXT:    v_readlane_b32 s44, v31, 8
+; GFX90a-NEXT:    v_readlane_b32 s43, v31, 7
+; GFX90a-NEXT:    v_readlane_b32 s42, v31, 6
+; GFX90a-NEXT:    v_readlane_b32 s41, v31, 5
+; GFX90a-NEXT:    v_readlane_b32 s40, v31, 4
+; GFX90a-NEXT:    v_readlane_b32 s39, v31, 3
+; GFX90a-NEXT:    v_readlane_b32 s38, v31, 2
+; GFX90a-NEXT:    v_readlane_b32 s37, v31, 1
+; GFX90a-NEXT:    v_readlane_b32 s36, v31, 0
+; GFX90a-NEXT:    v_accvgpr_read_b32 v63, a15 ; Reload Reuse
+; GFX90a-NEXT:    v_accvgpr_read_b32 v62, a14 ; Reload Reuse
+; GFX90a-NEXT:    v_accvgpr_read_b32 v61, a13 ; Reload Reuse
+; GFX90a-NEXT:    v_accvgpr_read_b32 v60, a12 ; Reload Reuse
+; GFX90a-NEXT:    v_accvgpr_read_b32 v59, a11 ; Reload Reuse
+; GFX90a-NEXT:    v_accvgpr_read_b32 v58, a10 ; Reload Reuse
+; GFX90a-NEXT:    v_accvgpr_read_b32 v57, a9 ; Reload Reuse
+; GFX90a-NEXT:    v_accvgpr_read_b32 v56, a8 ; Reload Reuse
+; GFX90a-NEXT:    v_accvgpr_read_b32 v47, a7 ; Reload Reuse
+; GFX90a-NEXT:    v_accvgpr_read_b32 v46, a6 ; Reload Reuse
+; GFX90a-NEXT:    v_accvgpr_read_b32 v45, a5 ; Reload Reuse
+; GFX90a-NEXT:    v_accvgpr_read_b32 v44, a4 ; Reload Reuse
+; GFX90a-NEXT:    v_accvgpr_read_b32 v43, a3 ; Reload Reuse
+; GFX90a-NEXT:    v_accvgpr_read_b32 v42, a2 ; Reload Reuse
+; GFX90a-NEXT:    v_accvgpr_read_b32 v41, a1 ; Reload Reuse
+; GFX90a-NEXT:    v_accvgpr_read_b32 v40, a0 ; Reload Reuse
+; GFX90a-NEXT:    s_xor_saveexec_b64 s[4:5], -1
+; GFX90a-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
+; GFX90a-NEXT:    s_mov_b64 exec, s[4:5]
+; GFX90a-NEXT:    s_waitcnt vmcnt(0)
+; GFX90a-NEXT:    s_setpc_b64 s[30:31]
+  %2 = shufflevector <16 x i64> %0, <16 x i64> zeroinitializer, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  ret i32 0
+}


        


More information about the llvm-commits mailing list