[llvm] 75cf046 - Re-apply "[AMDGPU] Fix test failures when expensive checks are enabled (#130644)"
Hans Wennborg via llvm-commits
llvm-commits at lists.llvm.org
Fri Mar 14 01:50:08 PDT 2025
Author: Shilei Tian
Date: 2025-03-14T09:49:20+01:00
New Revision: 75cf046f0306c41c711a5dc21755f6c723ef39ac
URL: https://github.com/llvm/llvm-project/commit/75cf046f0306c41c711a5dc21755f6c723ef39ac
DIFF: https://github.com/llvm/llvm-project/commit/75cf046f0306c41c711a5dc21755f6c723ef39ac.diff
LOG: Re-apply "[AMDGPU] Fix test failures when expensive checks are enabled (#130644)"
As suggested on
https://github.com/llvm/llvm-project/commit/5ec884e5d8a17f5764b09b66f28248b1dc403d4b#commitcomment-153707488
this seems to fix the following tests when building with -DLLVM_ENABLE_EXPENSIVE_CHECKS=ON:
LLVM :: CodeGen/AMDGPU/materialize-frame-index-sgpr.gfx10.ll
LLVM :: CodeGen/AMDGPU/materialize-frame-index-sgpr.ll
LLVM :: CodeGen/AMDGPU/schedule-amdgpu-tracker-physreg-crash.ll
> This PR fixes test failures introduced in #127353 when expensive checks
> are enabled.
>
> For `llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.ll` and
> `llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.gfx10.ll`, `s59`
> is no longer in live-ins because it is caller saved. Switch to `s55` in
> this PR.
Added:
Modified:
llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.gfx10.ll
llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.ll
llvm/test/CodeGen/AMDGPU/schedule-amdgpu-tracker-physreg-crash.ll
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.gfx10.ll b/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.gfx10.ll
index 4ca00f2daf97a..4b5a7c207055a 100644
--- a/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.gfx10.ll
+++ b/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.gfx10.ll
@@ -12,7 +12,13 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc() #0 {
; GFX10_1-LABEL: scalar_mov_materializes_frame_index_unavailable_scc:
; GFX10_1: ; %bb.0:
; GFX10_1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1
+; GFX10_1-NEXT: s_add_i32 s5, s32, 0x80880
+; GFX10_1-NEXT: buffer_store_dword v1, off, s[0:3], s5 ; 4-byte Folded Spill
+; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3
+; GFX10_1-NEXT: s_mov_b32 exec_lo, s4
; GFX10_1-NEXT: v_lshrrev_b32_e64 v0, 5, s32
+; GFX10_1-NEXT: v_writelane_b32 v1, s55, 0
; GFX10_1-NEXT: s_and_b32 s4, 0, exec_lo
; GFX10_1-NEXT: v_add_nc_u32_e32 v0, 64, v0
; GFX10_1-NEXT: ;;#ASMSTART
@@ -20,16 +26,28 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc() #0 {
; GFX10_1-NEXT: ;;#ASMEND
; GFX10_1-NEXT: v_lshrrev_b32_e64 v0, 5, s32
; GFX10_1-NEXT: v_add_nc_u32_e32 v0, 0x4040, v0
-; GFX10_1-NEXT: v_readfirstlane_b32 s59, v0
+; GFX10_1-NEXT: v_readfirstlane_b32 s55, v0
; GFX10_1-NEXT: ;;#ASMSTART
-; GFX10_1-NEXT: ; use s59, scc
+; GFX10_1-NEXT: ; use s55, scc
; GFX10_1-NEXT: ;;#ASMEND
+; GFX10_1-NEXT: v_readlane_b32 s55, v1, 0
+; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1
+; GFX10_1-NEXT: s_add_i32 s5, s32, 0x80880
+; GFX10_1-NEXT: buffer_load_dword v1, off, s[0:3], s5 ; 4-byte Folded Reload
+; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3
+; GFX10_1-NEXT: s_mov_b32 exec_lo, s4
+; GFX10_1-NEXT: s_waitcnt vmcnt(0)
; GFX10_1-NEXT: s_setpc_b64 s[30:31]
;
; GFX10_3-LABEL: scalar_mov_materializes_frame_index_unavailable_scc:
; GFX10_3: ; %bb.0:
; GFX10_3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1
+; GFX10_3-NEXT: s_add_i32 s5, s32, 0x80880
+; GFX10_3-NEXT: buffer_store_dword v1, off, s[0:3], s5 ; 4-byte Folded Spill
+; GFX10_3-NEXT: s_mov_b32 exec_lo, s4
; GFX10_3-NEXT: v_lshrrev_b32_e64 v0, 5, s32
+; GFX10_3-NEXT: v_writelane_b32 v1, s55, 0
; GFX10_3-NEXT: s_and_b32 s4, 0, exec_lo
; GFX10_3-NEXT: v_add_nc_u32_e32 v0, 64, v0
; GFX10_3-NEXT: ;;#ASMSTART
@@ -37,17 +55,27 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc() #0 {
; GFX10_3-NEXT: ;;#ASMEND
; GFX10_3-NEXT: v_lshrrev_b32_e64 v0, 5, s32
; GFX10_3-NEXT: v_add_nc_u32_e32 v0, 0x4040, v0
-; GFX10_3-NEXT: v_readfirstlane_b32 s59, v0
+; GFX10_3-NEXT: v_readfirstlane_b32 s55, v0
; GFX10_3-NEXT: ;;#ASMSTART
-; GFX10_3-NEXT: ; use s59, scc
+; GFX10_3-NEXT: ; use s55, scc
; GFX10_3-NEXT: ;;#ASMEND
+; GFX10_3-NEXT: v_readlane_b32 s55, v1, 0
+; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1
+; GFX10_3-NEXT: s_add_i32 s5, s32, 0x80880
+; GFX10_3-NEXT: buffer_load_dword v1, off, s[0:3], s5 ; 4-byte Folded Reload
+; GFX10_3-NEXT: s_mov_b32 exec_lo, s4
+; GFX10_3-NEXT: s_waitcnt vmcnt(0)
; GFX10_3-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: scalar_mov_materializes_frame_index_unavailable_scc:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: s_xor_saveexec_b32 s0, -1
+; GFX11-NEXT: s_add_i32 s1, s32, 0x4044
+; GFX11-NEXT: scratch_store_b32 off, v1, s1 ; 4-byte Folded Spill
+; GFX11-NEXT: s_mov_b32 exec_lo, s0
; GFX11-NEXT: s_add_i32 s0, s32, 64
-; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-NEXT: v_writelane_b32 v1, s55, 0
; GFX11-NEXT: v_mov_b32_e32 v0, s0
; GFX11-NEXT: s_and_b32 s0, 0, exec_lo
; GFX11-NEXT: s_addc_u32 s0, s32, 0x4040
@@ -57,10 +85,16 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc() #0 {
; GFX11-NEXT: s_bitcmp1_b32 s0, 0
; GFX11-NEXT: s_bitset0_b32 s0, 0
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
-; GFX11-NEXT: s_mov_b32 s59, s0
+; GFX11-NEXT: s_mov_b32 s55, s0
; GFX11-NEXT: ;;#ASMSTART
-; GFX11-NEXT: ; use s59, scc
+; GFX11-NEXT: ; use s55, scc
; GFX11-NEXT: ;;#ASMEND
+; GFX11-NEXT: v_readlane_b32 s55, v1, 0
+; GFX11-NEXT: s_xor_saveexec_b32 s0, -1
+; GFX11-NEXT: s_add_i32 s1, s32, 0x4044
+; GFX11-NEXT: scratch_load_b32 v1, off, s1 ; 4-byte Folded Reload
+; GFX11-NEXT: s_mov_b32 exec_lo, s0
+; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: scalar_mov_materializes_frame_index_unavailable_scc:
@@ -70,7 +104,13 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc() #0 {
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: s_xor_saveexec_b32 s0, -1
+; GFX12-NEXT: scratch_store_b32 off, v1, s32 offset:16388 ; 4-byte Folded Spill
+; GFX12-NEXT: s_wait_alu 0xfffe
+; GFX12-NEXT: s_mov_b32 exec_lo, s0
+; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX12-NEXT: s_and_b32 s0, 0, exec_lo
+; GFX12-NEXT: v_writelane_b32 v1, s55, 0
; GFX12-NEXT: s_add_co_ci_u32 s0, s32, 0x4000
; GFX12-NEXT: v_mov_b32_e32 v0, s32
; GFX12-NEXT: s_wait_alu 0xfffe
@@ -80,34 +120,54 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc() #0 {
; GFX12-NEXT: ; use alloca0 v0
; GFX12-NEXT: ;;#ASMEND
; GFX12-NEXT: s_wait_alu 0xfffe
-; GFX12-NEXT: s_mov_b32 s59, s0
+; GFX12-NEXT: s_mov_b32 s55, s0
; GFX12-NEXT: ;;#ASMSTART
-; GFX12-NEXT: ; use s59, scc
+; GFX12-NEXT: ; use s55, scc
; GFX12-NEXT: ;;#ASMEND
+; GFX12-NEXT: v_readlane_b32 s55, v1, 0
+; GFX12-NEXT: s_xor_saveexec_b32 s0, -1
+; GFX12-NEXT: scratch_load_b32 v1, off, s32 offset:16388 ; 4-byte Folded Reload
; GFX12-NEXT: s_wait_alu 0xfffe
+; GFX12-NEXT: s_mov_b32 exec_lo, s0
+; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: scalar_mov_materializes_frame_index_unavailable_scc:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX8-NEXT: s_add_i32 s6, s32, 0x101100
+; GFX8-NEXT: buffer_store_dword v1, off, s[0:3], s6 ; 4-byte Folded Spill
+; GFX8-NEXT: s_mov_b64 exec, s[4:5]
; GFX8-NEXT: v_lshrrev_b32_e64 v0, 6, s32
; GFX8-NEXT: v_add_u32_e32 v0, vcc, 64, v0
+; GFX8-NEXT: v_writelane_b32 v1, s55, 0
; GFX8-NEXT: ;;#ASMSTART
; GFX8-NEXT: ; use alloca0 v0
; GFX8-NEXT: ;;#ASMEND
; GFX8-NEXT: v_lshrrev_b32_e64 v0, 6, s32
-; GFX8-NEXT: s_movk_i32 s59, 0x4040
-; GFX8-NEXT: v_add_u32_e32 v0, vcc, s59, v0
+; GFX8-NEXT: s_movk_i32 s55, 0x4040
+; GFX8-NEXT: v_add_u32_e32 v0, vcc, s55, v0
+; GFX8-NEXT: v_readfirstlane_b32 s55, v0
; GFX8-NEXT: s_and_b64 s[4:5], 0, exec
-; GFX8-NEXT: v_readfirstlane_b32 s59, v0
; GFX8-NEXT: ;;#ASMSTART
-; GFX8-NEXT: ; use s59, scc
+; GFX8-NEXT: ; use s55, scc
; GFX8-NEXT: ;;#ASMEND
+; GFX8-NEXT: v_readlane_b32 s55, v1, 0
+; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX8-NEXT: s_add_i32 s6, s32, 0x101100
+; GFX8-NEXT: buffer_load_dword v1, off, s[0:3], s6 ; 4-byte Folded Reload
+; GFX8-NEXT: s_mov_b64 exec, s[4:5]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX900-LABEL: scalar_mov_materializes_frame_index_unavailable_scc:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX900-NEXT: s_add_i32 s6, s32, 0x101100
+; GFX900-NEXT: buffer_store_dword v1, off, s[0:3], s6 ; 4-byte Folded Spill
+; GFX900-NEXT: s_mov_b64 exec, s[4:5]
; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s32
; GFX900-NEXT: v_add_u32_e32 v0, 64, v0
; GFX900-NEXT: ;;#ASMSTART
@@ -115,34 +175,52 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc() #0 {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s32
; GFX900-NEXT: v_add_u32_e32 v0, 0x4040, v0
+; GFX900-NEXT: v_writelane_b32 v1, s55, 0
+; GFX900-NEXT: v_readfirstlane_b32 s55, v0
; GFX900-NEXT: s_and_b64 s[4:5], 0, exec
-; GFX900-NEXT: v_readfirstlane_b32 s59, v0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s59, scc
+; GFX900-NEXT: ; use s55, scc
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_readlane_b32 s55, v1, 0
+; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX900-NEXT: s_add_i32 s6, s32, 0x101100
+; GFX900-NEXT: buffer_load_dword v1, off, s[0:3], s6 ; 4-byte Folded Reload
+; GFX900-NEXT: s_mov_b64 exec, s[4:5]
+; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: scalar_mov_materializes_frame_index_unavailable_scc:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1
+; GFX942-NEXT: s_add_i32 s2, s32, 0x4044
+; GFX942-NEXT: scratch_store_dword off, v1, s2 ; 4-byte Folded Spill
+; GFX942-NEXT: s_mov_b64 exec, s[0:1]
; GFX942-NEXT: s_add_i32 s0, s32, 64
; GFX942-NEXT: v_mov_b32_e32 v0, s0
; GFX942-NEXT: s_and_b64 s[0:1], 0, exec
; GFX942-NEXT: s_addc_u32 s0, s32, 0x4040
; GFX942-NEXT: s_bitcmp1_b32 s0, 0
; GFX942-NEXT: s_bitset0_b32 s0, 0
+; GFX942-NEXT: v_writelane_b32 v1, s55, 0
+; GFX942-NEXT: s_mov_b32 s55, s0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use alloca0 v0
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s59, s0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s59, scc
+; GFX942-NEXT: ; use s55, scc
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: v_readlane_b32 s55, v1, 0
+; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1
+; GFX942-NEXT: s_add_i32 s2, s32, 0x4044
+; GFX942-NEXT: scratch_load_dword v1, off, s2 ; 4-byte Folded Reload
+; GFX942-NEXT: s_mov_b64 exec, s[0:1]
+; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%alloca0 = alloca [4096 x i32], align 64, addrspace(5)
%alloca1 = alloca i32, align 4, addrspace(5)
call void asm sideeffect "; use alloca0 $0", "v"(ptr addrspace(5) %alloca0)
- call void asm sideeffect "; use $0, $1", "{s59},{scc}"(ptr addrspace(5) %alloca1, i32 0)
+ call void asm sideeffect "; use $0, $1", "{s55},{scc}"(ptr addrspace(5) %alloca1, i32 0)
ret void
}
@@ -152,36 +230,65 @@ define void @scalar_mov_materializes_frame_index_dead_scc() #0 {
; GFX10_1-LABEL: scalar_mov_materializes_frame_index_dead_scc:
; GFX10_1: ; %bb.0:
; GFX10_1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1
+; GFX10_1-NEXT: s_add_i32 s5, s32, 0x80880
+; GFX10_1-NEXT: buffer_store_dword v1, off, s[0:3], s5 ; 4-byte Folded Spill
+; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3
+; GFX10_1-NEXT: s_mov_b32 exec_lo, s4
+; GFX10_1-NEXT: v_writelane_b32 v1, s55, 0
; GFX10_1-NEXT: v_lshrrev_b32_e64 v0, 5, s32
-; GFX10_1-NEXT: s_lshr_b32 s59, s32, 5
-; GFX10_1-NEXT: s_addk_i32 s59, 0x4040
+; GFX10_1-NEXT: s_lshr_b32 s55, s32, 5
+; GFX10_1-NEXT: s_addk_i32 s55, 0x4040
; GFX10_1-NEXT: v_add_nc_u32_e32 v0, 64, v0
; GFX10_1-NEXT: ;;#ASMSTART
; GFX10_1-NEXT: ; use alloca0 v0
; GFX10_1-NEXT: ;;#ASMEND
; GFX10_1-NEXT: ;;#ASMSTART
-; GFX10_1-NEXT: ; use s59
+; GFX10_1-NEXT: ; use s55
; GFX10_1-NEXT: ;;#ASMEND
+; GFX10_1-NEXT: v_readlane_b32 s55, v1, 0
+; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1
+; GFX10_1-NEXT: s_add_i32 s5, s32, 0x80880
+; GFX10_1-NEXT: buffer_load_dword v1, off, s[0:3], s5 ; 4-byte Folded Reload
+; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3
+; GFX10_1-NEXT: s_mov_b32 exec_lo, s4
+; GFX10_1-NEXT: s_waitcnt vmcnt(0)
; GFX10_1-NEXT: s_setpc_b64 s[30:31]
;
; GFX10_3-LABEL: scalar_mov_materializes_frame_index_dead_scc:
; GFX10_3: ; %bb.0:
; GFX10_3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1
+; GFX10_3-NEXT: s_add_i32 s5, s32, 0x80880
+; GFX10_3-NEXT: buffer_store_dword v1, off, s[0:3], s5 ; 4-byte Folded Spill
+; GFX10_3-NEXT: s_mov_b32 exec_lo, s4
+; GFX10_3-NEXT: v_writelane_b32 v1, s55, 0
; GFX10_3-NEXT: v_lshrrev_b32_e64 v0, 5, s32
-; GFX10_3-NEXT: s_lshr_b32 s59, s32, 5
-; GFX10_3-NEXT: s_addk_i32 s59, 0x4040
+; GFX10_3-NEXT: s_lshr_b32 s55, s32, 5
+; GFX10_3-NEXT: s_addk_i32 s55, 0x4040
; GFX10_3-NEXT: v_add_nc_u32_e32 v0, 64, v0
; GFX10_3-NEXT: ;;#ASMSTART
; GFX10_3-NEXT: ; use alloca0 v0
; GFX10_3-NEXT: ;;#ASMEND
; GFX10_3-NEXT: ;;#ASMSTART
-; GFX10_3-NEXT: ; use s59
+; GFX10_3-NEXT: ; use s55
; GFX10_3-NEXT: ;;#ASMEND
+; GFX10_3-NEXT: v_readlane_b32 s55, v1, 0
+; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1
+; GFX10_3-NEXT: s_add_i32 s5, s32, 0x80880
+; GFX10_3-NEXT: buffer_load_dword v1, off, s[0:3], s5 ; 4-byte Folded Reload
+; GFX10_3-NEXT: s_mov_b32 exec_lo, s4
+; GFX10_3-NEXT: s_waitcnt vmcnt(0)
; GFX10_3-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: scalar_mov_materializes_frame_index_dead_scc:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: s_xor_saveexec_b32 s0, -1
+; GFX11-NEXT: s_add_i32 s1, s32, 0x4044
+; GFX11-NEXT: scratch_store_b32 off, v1, s1 ; 4-byte Folded Spill
+; GFX11-NEXT: s_mov_b32 exec_lo, s0
+; GFX11-NEXT: v_writelane_b32 v1, s55, 0
; GFX11-NEXT: s_add_i32 s0, s32, 64
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: v_mov_b32_e32 v0, s0
@@ -189,10 +296,16 @@ define void @scalar_mov_materializes_frame_index_dead_scc() #0 {
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; use alloca0 v0
; GFX11-NEXT: ;;#ASMEND
-; GFX11-NEXT: s_mov_b32 s59, s0
+; GFX11-NEXT: s_mov_b32 s55, s0
; GFX11-NEXT: ;;#ASMSTART
-; GFX11-NEXT: ; use s59
+; GFX11-NEXT: ; use s55
; GFX11-NEXT: ;;#ASMEND
+; GFX11-NEXT: v_readlane_b32 s55, v1, 0
+; GFX11-NEXT: s_xor_saveexec_b32 s0, -1
+; GFX11-NEXT: s_add_i32 s1, s32, 0x4044
+; GFX11-NEXT: scratch_load_b32 v1, off, s1 ; 4-byte Folded Reload
+; GFX11-NEXT: s_mov_b32 exec_lo, s0
+; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: scalar_mov_materializes_frame_index_dead_scc:
@@ -202,67 +315,110 @@ define void @scalar_mov_materializes_frame_index_dead_scc() #0 {
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: s_xor_saveexec_b32 s0, -1
+; GFX12-NEXT: scratch_store_b32 off, v1, s32 offset:16388 ; 4-byte Folded Spill
+; GFX12-NEXT: s_wait_alu 0xfffe
+; GFX12-NEXT: s_mov_b32 exec_lo, s0
+; GFX12-NEXT: v_writelane_b32 v1, s55, 0
; GFX12-NEXT: s_add_co_i32 s0, s32, 0x4000
; GFX12-NEXT: v_mov_b32_e32 v0, s32
+; GFX12-NEXT: s_wait_alu 0xfffe
+; GFX12-NEXT: s_mov_b32 s55, s0
; GFX12-NEXT: ;;#ASMSTART
; GFX12-NEXT: ; use alloca0 v0
; GFX12-NEXT: ;;#ASMEND
-; GFX12-NEXT: s_wait_alu 0xfffe
-; GFX12-NEXT: s_mov_b32 s59, s0
; GFX12-NEXT: ;;#ASMSTART
-; GFX12-NEXT: ; use s59
+; GFX12-NEXT: ; use s55
; GFX12-NEXT: ;;#ASMEND
+; GFX12-NEXT: v_readlane_b32 s55, v1, 0
+; GFX12-NEXT: s_xor_saveexec_b32 s0, -1
+; GFX12-NEXT: scratch_load_b32 v1, off, s32 offset:16388 ; 4-byte Folded Reload
; GFX12-NEXT: s_wait_alu 0xfffe
+; GFX12-NEXT: s_mov_b32 exec_lo, s0
+; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: scalar_mov_materializes_frame_index_dead_scc:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX8-NEXT: s_add_i32 s6, s32, 0x101100
+; GFX8-NEXT: buffer_store_dword v1, off, s[0:3], s6 ; 4-byte Folded Spill
+; GFX8-NEXT: s_mov_b64 exec, s[4:5]
+; GFX8-NEXT: v_writelane_b32 v1, s55, 0
+; GFX8-NEXT: s_lshr_b32 s55, s32, 6
; GFX8-NEXT: v_lshrrev_b32_e64 v0, 6, s32
-; GFX8-NEXT: s_lshr_b32 s59, s32, 6
+; GFX8-NEXT: s_addk_i32 s55, 0x4040
; GFX8-NEXT: v_add_u32_e32 v0, vcc, 64, v0
; GFX8-NEXT: ;;#ASMSTART
; GFX8-NEXT: ; use alloca0 v0
; GFX8-NEXT: ;;#ASMEND
-; GFX8-NEXT: s_addk_i32 s59, 0x4040
; GFX8-NEXT: ;;#ASMSTART
-; GFX8-NEXT: ; use s59
+; GFX8-NEXT: ; use s55
; GFX8-NEXT: ;;#ASMEND
+; GFX8-NEXT: v_readlane_b32 s55, v1, 0
+; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX8-NEXT: s_add_i32 s6, s32, 0x101100
+; GFX8-NEXT: buffer_load_dword v1, off, s[0:3], s6 ; 4-byte Folded Reload
+; GFX8-NEXT: s_mov_b64 exec, s[4:5]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX900-LABEL: scalar_mov_materializes_frame_index_dead_scc:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX900-NEXT: s_add_i32 s6, s32, 0x101100
+; GFX900-NEXT: buffer_store_dword v1, off, s[0:3], s6 ; 4-byte Folded Spill
+; GFX900-NEXT: s_mov_b64 exec, s[4:5]
+; GFX900-NEXT: v_writelane_b32 v1, s55, 0
+; GFX900-NEXT: s_lshr_b32 s55, s32, 6
; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s32
-; GFX900-NEXT: s_lshr_b32 s59, s32, 6
+; GFX900-NEXT: s_addk_i32 s55, 0x4040
; GFX900-NEXT: v_add_u32_e32 v0, 64, v0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use alloca0 v0
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_addk_i32 s59, 0x4040
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s59
+; GFX900-NEXT: ; use s55
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_readlane_b32 s55, v1, 0
+; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX900-NEXT: s_add_i32 s6, s32, 0x101100
+; GFX900-NEXT: buffer_load_dword v1, off, s[0:3], s6 ; 4-byte Folded Reload
+; GFX900-NEXT: s_mov_b64 exec, s[4:5]
+; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: scalar_mov_materializes_frame_index_dead_scc:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1
+; GFX942-NEXT: s_add_i32 s2, s32, 0x4044
+; GFX942-NEXT: scratch_store_dword off, v1, s2 ; 4-byte Folded Spill
+; GFX942-NEXT: s_mov_b64 exec, s[0:1]
; GFX942-NEXT: s_add_i32 s0, s32, 64
; GFX942-NEXT: v_mov_b32_e32 v0, s0
; GFX942-NEXT: s_add_i32 s0, s32, 0x4040
+; GFX942-NEXT: v_writelane_b32 v1, s55, 0
+; GFX942-NEXT: s_mov_b32 s55, s0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use alloca0 v0
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s59, s0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s59
+; GFX942-NEXT: ; use s55
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: v_readlane_b32 s55, v1, 0
+; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1
+; GFX942-NEXT: s_add_i32 s2, s32, 0x4044
+; GFX942-NEXT: scratch_load_dword v1, off, s2 ; 4-byte Folded Reload
+; GFX942-NEXT: s_mov_b64 exec, s[0:1]
+; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%alloca0 = alloca [4096 x i32], align 64, addrspace(5)
%alloca1 = alloca i32, align 4, addrspace(5)
call void asm sideeffect "; use alloca0 $0", "v"(ptr addrspace(5) %alloca0)
- call void asm sideeffect "; use $0", "{s59}"(ptr addrspace(5) %alloca1)
+ call void asm sideeffect "; use $0", "{s55}"(ptr addrspace(5) %alloca1)
ret void
}
@@ -272,8 +428,14 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_fp() #1 {
; GFX10_1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10_1-NEXT: s_mov_b32 s5, s33
; GFX10_1-NEXT: s_mov_b32 s33, s32
-; GFX10_1-NEXT: s_add_i32 s32, s32, 0x81000
+; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1
+; GFX10_1-NEXT: s_add_i32 s6, s33, 0x80880
+; GFX10_1-NEXT: buffer_store_dword v1, off, s[0:3], s6 ; 4-byte Folded Spill
+; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3
+; GFX10_1-NEXT: s_mov_b32 exec_lo, s4
; GFX10_1-NEXT: v_lshrrev_b32_e64 v0, 5, s33
+; GFX10_1-NEXT: v_writelane_b32 v1, s55, 0
+; GFX10_1-NEXT: s_add_i32 s32, s32, 0x81000
; GFX10_1-NEXT: s_and_b32 s4, 0, exec_lo
; GFX10_1-NEXT: s_mov_b32 s32, s33
; GFX10_1-NEXT: v_add_nc_u32_e32 v0, 64, v0
@@ -281,12 +443,19 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_fp() #1 {
; GFX10_1-NEXT: ; use alloca0 v0
; GFX10_1-NEXT: ;;#ASMEND
; GFX10_1-NEXT: v_lshrrev_b32_e64 v0, 5, s33
-; GFX10_1-NEXT: s_mov_b32 s33, s5
; GFX10_1-NEXT: v_add_nc_u32_e32 v0, 0x4040, v0
-; GFX10_1-NEXT: v_readfirstlane_b32 s59, v0
+; GFX10_1-NEXT: v_readfirstlane_b32 s55, v0
; GFX10_1-NEXT: ;;#ASMSTART
-; GFX10_1-NEXT: ; use s59, scc
+; GFX10_1-NEXT: ; use s55, scc
; GFX10_1-NEXT: ;;#ASMEND
+; GFX10_1-NEXT: v_readlane_b32 s55, v1, 0
+; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1
+; GFX10_1-NEXT: s_add_i32 s6, s33, 0x80880
+; GFX10_1-NEXT: buffer_load_dword v1, off, s[0:3], s6 ; 4-byte Folded Reload
+; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3
+; GFX10_1-NEXT: s_mov_b32 exec_lo, s4
+; GFX10_1-NEXT: s_mov_b32 s33, s5
+; GFX10_1-NEXT: s_waitcnt vmcnt(0)
; GFX10_1-NEXT: s_setpc_b64 s[30:31]
;
; GFX10_3-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_fp:
@@ -294,8 +463,13 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_fp() #1 {
; GFX10_3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10_3-NEXT: s_mov_b32 s5, s33
; GFX10_3-NEXT: s_mov_b32 s33, s32
-; GFX10_3-NEXT: s_add_i32 s32, s32, 0x81000
+; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1
+; GFX10_3-NEXT: s_add_i32 s6, s33, 0x80880
+; GFX10_3-NEXT: buffer_store_dword v1, off, s[0:3], s6 ; 4-byte Folded Spill
+; GFX10_3-NEXT: s_mov_b32 exec_lo, s4
; GFX10_3-NEXT: v_lshrrev_b32_e64 v0, 5, s33
+; GFX10_3-NEXT: v_writelane_b32 v1, s55, 0
+; GFX10_3-NEXT: s_add_i32 s32, s32, 0x81000
; GFX10_3-NEXT: s_and_b32 s4, 0, exec_lo
; GFX10_3-NEXT: s_mov_b32 s32, s33
; GFX10_3-NEXT: v_add_nc_u32_e32 v0, 64, v0
@@ -303,12 +477,18 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_fp() #1 {
; GFX10_3-NEXT: ; use alloca0 v0
; GFX10_3-NEXT: ;;#ASMEND
; GFX10_3-NEXT: v_lshrrev_b32_e64 v0, 5, s33
-; GFX10_3-NEXT: s_mov_b32 s33, s5
; GFX10_3-NEXT: v_add_nc_u32_e32 v0, 0x4040, v0
-; GFX10_3-NEXT: v_readfirstlane_b32 s59, v0
+; GFX10_3-NEXT: v_readfirstlane_b32 s55, v0
; GFX10_3-NEXT: ;;#ASMSTART
-; GFX10_3-NEXT: ; use s59, scc
+; GFX10_3-NEXT: ; use s55, scc
; GFX10_3-NEXT: ;;#ASMEND
+; GFX10_3-NEXT: v_readlane_b32 s55, v1, 0
+; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1
+; GFX10_3-NEXT: s_add_i32 s6, s33, 0x80880
+; GFX10_3-NEXT: buffer_load_dword v1, off, s[0:3], s6 ; 4-byte Folded Reload
+; GFX10_3-NEXT: s_mov_b32 exec_lo, s4
+; GFX10_3-NEXT: s_mov_b32 s33, s5
+; GFX10_3-NEXT: s_waitcnt vmcnt(0)
; GFX10_3-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_fp:
@@ -316,9 +496,13 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_fp() #1 {
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_mov_b32 s1, s33
; GFX11-NEXT: s_mov_b32 s33, s32
+; GFX11-NEXT: s_xor_saveexec_b32 s0, -1
+; GFX11-NEXT: s_add_i32 s2, s33, 0x4044
+; GFX11-NEXT: scratch_store_b32 off, v1, s2 ; 4-byte Folded Spill
+; GFX11-NEXT: s_mov_b32 exec_lo, s0
; GFX11-NEXT: s_addk_i32 s32, 0x4080
; GFX11-NEXT: s_add_i32 s0, s33, 64
-; GFX11-NEXT: s_mov_b32 s32, s33
+; GFX11-NEXT: v_writelane_b32 v1, s55, 0
; GFX11-NEXT: v_mov_b32_e32 v0, s0
; GFX11-NEXT: s_and_b32 s0, 0, exec_lo
; GFX11-NEXT: s_addc_u32 s0, s33, 0x4040
@@ -327,11 +511,18 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_fp() #1 {
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: s_bitcmp1_b32 s0, 0
; GFX11-NEXT: s_bitset0_b32 s0, 0
-; GFX11-NEXT: s_mov_b32 s33, s1
-; GFX11-NEXT: s_mov_b32 s59, s0
+; GFX11-NEXT: s_mov_b32 s32, s33
+; GFX11-NEXT: s_mov_b32 s55, s0
; GFX11-NEXT: ;;#ASMSTART
-; GFX11-NEXT: ; use s59, scc
+; GFX11-NEXT: ; use s55, scc
; GFX11-NEXT: ;;#ASMEND
+; GFX11-NEXT: v_readlane_b32 s55, v1, 0
+; GFX11-NEXT: s_xor_saveexec_b32 s0, -1
+; GFX11-NEXT: s_add_i32 s2, s33, 0x4044
+; GFX11-NEXT: scratch_load_b32 v1, off, s2 ; 4-byte Folded Reload
+; GFX11-NEXT: s_mov_b32 exec_lo, s0
+; GFX11-NEXT: s_mov_b32 s33, s1
+; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_fp:
@@ -343,9 +534,13 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_fp() #1 {
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: s_mov_b32 s1, s33
; GFX12-NEXT: s_mov_b32 s33, s32
+; GFX12-NEXT: s_xor_saveexec_b32 s0, -1
+; GFX12-NEXT: scratch_store_b32 off, v1, s33 offset:16388 ; 4-byte Folded Spill
+; GFX12-NEXT: s_wait_alu 0xfffe
+; GFX12-NEXT: s_mov_b32 exec_lo, s0
; GFX12-NEXT: s_addk_co_i32 s32, 0x4040
; GFX12-NEXT: s_and_b32 s0, 0, exec_lo
-; GFX12-NEXT: s_wait_alu 0xfffe
+; GFX12-NEXT: v_writelane_b32 v1, s55, 0
; GFX12-NEXT: s_add_co_ci_u32 s0, s33, 0x4000
; GFX12-NEXT: v_mov_b32_e32 v0, s33
; GFX12-NEXT: s_wait_alu 0xfffe
@@ -355,12 +550,18 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_fp() #1 {
; GFX12-NEXT: ; use alloca0 v0
; GFX12-NEXT: ;;#ASMEND
; GFX12-NEXT: s_wait_alu 0xfffe
-; GFX12-NEXT: s_mov_b32 s59, s0
+; GFX12-NEXT: s_mov_b32 s55, s0
; GFX12-NEXT: ;;#ASMSTART
-; GFX12-NEXT: ; use s59, scc
+; GFX12-NEXT: ; use s55, scc
; GFX12-NEXT: ;;#ASMEND
+; GFX12-NEXT: v_readlane_b32 s55, v1, 0
; GFX12-NEXT: s_mov_b32 s32, s33
+; GFX12-NEXT: s_xor_saveexec_b32 s0, -1
+; GFX12-NEXT: scratch_load_b32 v1, off, s33 offset:16388 ; 4-byte Folded Reload
+; GFX12-NEXT: s_wait_alu 0xfffe
+; GFX12-NEXT: s_mov_b32 exec_lo, s0
; GFX12-NEXT: s_mov_b32 s33, s1
+; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
@@ -369,22 +570,33 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_fp() #1 {
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: s_mov_b32 s6, s33
; GFX8-NEXT: s_mov_b32 s33, s32
+; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX8-NEXT: s_add_i32 s7, s33, 0x101100
+; GFX8-NEXT: buffer_store_dword v1, off, s[0:3], s7 ; 4-byte Folded Spill
+; GFX8-NEXT: s_mov_b64 exec, s[4:5]
; GFX8-NEXT: v_lshrrev_b32_e64 v0, 6, s33
; GFX8-NEXT: v_add_u32_e32 v0, vcc, 64, v0
+; GFX8-NEXT: v_writelane_b32 v1, s55, 0
; GFX8-NEXT: ;;#ASMSTART
; GFX8-NEXT: ; use alloca0 v0
; GFX8-NEXT: ;;#ASMEND
; GFX8-NEXT: v_lshrrev_b32_e64 v0, 6, s33
-; GFX8-NEXT: s_movk_i32 s59, 0x4040
+; GFX8-NEXT: s_movk_i32 s55, 0x4040
+; GFX8-NEXT: v_add_u32_e32 v0, vcc, s55, v0
; GFX8-NEXT: s_add_i32 s32, s32, 0x102000
-; GFX8-NEXT: v_add_u32_e32 v0, vcc, s59, v0
+; GFX8-NEXT: v_readfirstlane_b32 s55, v0
; GFX8-NEXT: s_and_b64 s[4:5], 0, exec
-; GFX8-NEXT: v_readfirstlane_b32 s59, v0
; GFX8-NEXT: ;;#ASMSTART
-; GFX8-NEXT: ; use s59, scc
+; GFX8-NEXT: ; use s55, scc
; GFX8-NEXT: ;;#ASMEND
+; GFX8-NEXT: v_readlane_b32 s55, v1, 0
; GFX8-NEXT: s_mov_b32 s32, s33
+; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX8-NEXT: s_add_i32 s7, s33, 0x101100
+; GFX8-NEXT: buffer_load_dword v1, off, s[0:3], s7 ; 4-byte Folded Reload
+; GFX8-NEXT: s_mov_b64 exec, s[4:5]
; GFX8-NEXT: s_mov_b32 s33, s6
+; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX900-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_fp:
@@ -392,21 +604,32 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_fp() #1 {
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: s_mov_b32 s6, s33
; GFX900-NEXT: s_mov_b32 s33, s32
+; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX900-NEXT: s_add_i32 s7, s33, 0x101100
+; GFX900-NEXT: buffer_store_dword v1, off, s[0:3], s7 ; 4-byte Folded Spill
+; GFX900-NEXT: s_mov_b64 exec, s[4:5]
; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s33
; GFX900-NEXT: v_add_u32_e32 v0, 64, v0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use alloca0 v0
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s33
-; GFX900-NEXT: s_add_i32 s32, s32, 0x102000
; GFX900-NEXT: v_add_u32_e32 v0, 0x4040, v0
+; GFX900-NEXT: s_add_i32 s32, s32, 0x102000
+; GFX900-NEXT: v_writelane_b32 v1, s55, 0
+; GFX900-NEXT: v_readfirstlane_b32 s55, v0
; GFX900-NEXT: s_and_b64 s[4:5], 0, exec
-; GFX900-NEXT: v_readfirstlane_b32 s59, v0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s59, scc
+; GFX900-NEXT: ; use s55, scc
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_readlane_b32 s55, v1, 0
; GFX900-NEXT: s_mov_b32 s32, s33
+; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX900-NEXT: s_add_i32 s7, s33, 0x101100
+; GFX900-NEXT: buffer_load_dword v1, off, s[0:3], s7 ; 4-byte Folded Reload
+; GFX900-NEXT: s_mov_b64 exec, s[4:5]
; GFX900-NEXT: s_mov_b32 s33, s6
+; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_fp:
@@ -414,6 +637,10 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_fp() #1 {
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: s_mov_b32 s2, s33
; GFX942-NEXT: s_mov_b32 s33, s32
+; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1
+; GFX942-NEXT: s_add_i32 s3, s33, 0x4044
+; GFX942-NEXT: scratch_store_dword off, v1, s3 ; 4-byte Folded Spill
+; GFX942-NEXT: s_mov_b64 exec, s[0:1]
; GFX942-NEXT: s_addk_i32 s32, 0x4080
; GFX942-NEXT: s_add_i32 s0, s33, 64
; GFX942-NEXT: v_mov_b32_e32 v0, s0
@@ -421,20 +648,27 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_fp() #1 {
; GFX942-NEXT: s_addc_u32 s0, s33, 0x4040
; GFX942-NEXT: s_bitcmp1_b32 s0, 0
; GFX942-NEXT: s_bitset0_b32 s0, 0
+; GFX942-NEXT: v_writelane_b32 v1, s55, 0
+; GFX942-NEXT: s_mov_b32 s55, s0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use alloca0 v0
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s59, s0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s59, scc
+; GFX942-NEXT: ; use s55, scc
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: v_readlane_b32 s55, v1, 0
; GFX942-NEXT: s_mov_b32 s32, s33
+; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1
+; GFX942-NEXT: s_add_i32 s3, s33, 0x4044
+; GFX942-NEXT: scratch_load_dword v1, off, s3 ; 4-byte Folded Reload
+; GFX942-NEXT: s_mov_b64 exec, s[0:1]
; GFX942-NEXT: s_mov_b32 s33, s2
+; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%alloca0 = alloca [4096 x i32], align 64, addrspace(5)
%alloca1 = alloca i32, align 4, addrspace(5)
call void asm sideeffect "; use alloca0 $0", "v"(ptr addrspace(5) %alloca0)
- call void asm sideeffect "; use $0, $1", "{s59},{scc}"(ptr addrspace(5) %alloca1, i32 0)
+ call void asm sideeffect "; use $0, $1", "{s55},{scc}"(ptr addrspace(5) %alloca1, i32 0)
ret void
}
@@ -442,39 +676,75 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_small_offset()
; GFX10_1-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_small_offset:
; GFX10_1: ; %bb.0:
; GFX10_1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10_1-NEXT: v_lshrrev_b32_e64 v0, 5, s32
+; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1
+; GFX10_1-NEXT: s_add_i32 s5, s32, 0x80800
+; GFX10_1-NEXT: buffer_store_dword v0, off, s[0:3], s5 ; 4-byte Folded Spill
+; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3
+; GFX10_1-NEXT: s_mov_b32 exec_lo, s4
+; GFX10_1-NEXT: v_lshrrev_b32_e64 v1, 5, s32
+; GFX10_1-NEXT: v_writelane_b32 v0, s55, 0
; GFX10_1-NEXT: s_and_b32 s4, 0, exec_lo
-; GFX10_1-NEXT: v_add_nc_u32_e32 v0, 64, v0
-; GFX10_1-NEXT: v_readfirstlane_b32 s59, v0
+; GFX10_1-NEXT: v_add_nc_u32_e32 v1, 64, v1
+; GFX10_1-NEXT: v_readfirstlane_b32 s55, v1
; GFX10_1-NEXT: ;;#ASMSTART
-; GFX10_1-NEXT: ; use s59, scc
+; GFX10_1-NEXT: ; use s55, scc
; GFX10_1-NEXT: ;;#ASMEND
+; GFX10_1-NEXT: v_readlane_b32 s55, v0, 0
+; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1
+; GFX10_1-NEXT: s_add_i32 s5, s32, 0x80800
+; GFX10_1-NEXT: buffer_load_dword v0, off, s[0:3], s5 ; 4-byte Folded Reload
+; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3
+; GFX10_1-NEXT: s_mov_b32 exec_lo, s4
+; GFX10_1-NEXT: s_waitcnt vmcnt(0)
; GFX10_1-NEXT: s_setpc_b64 s[30:31]
;
; GFX10_3-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_small_offset:
; GFX10_3: ; %bb.0:
; GFX10_3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10_3-NEXT: v_lshrrev_b32_e64 v0, 5, s32
+; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1
+; GFX10_3-NEXT: s_add_i32 s5, s32, 0x80800
+; GFX10_3-NEXT: buffer_store_dword v0, off, s[0:3], s5 ; 4-byte Folded Spill
+; GFX10_3-NEXT: s_mov_b32 exec_lo, s4
+; GFX10_3-NEXT: v_lshrrev_b32_e64 v1, 5, s32
+; GFX10_3-NEXT: v_writelane_b32 v0, s55, 0
; GFX10_3-NEXT: s_and_b32 s4, 0, exec_lo
-; GFX10_3-NEXT: v_add_nc_u32_e32 v0, 64, v0
-; GFX10_3-NEXT: v_readfirstlane_b32 s59, v0
+; GFX10_3-NEXT: v_add_nc_u32_e32 v1, 64, v1
+; GFX10_3-NEXT: v_readfirstlane_b32 s55, v1
; GFX10_3-NEXT: ;;#ASMSTART
-; GFX10_3-NEXT: ; use s59, scc
+; GFX10_3-NEXT: ; use s55, scc
; GFX10_3-NEXT: ;;#ASMEND
+; GFX10_3-NEXT: v_readlane_b32 s55, v0, 0
+; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1
+; GFX10_3-NEXT: s_add_i32 s5, s32, 0x80800
+; GFX10_3-NEXT: buffer_load_dword v0, off, s[0:3], s5 ; 4-byte Folded Reload
+; GFX10_3-NEXT: s_mov_b32 exec_lo, s4
+; GFX10_3-NEXT: s_waitcnt vmcnt(0)
; GFX10_3-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_small_offset:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: s_xor_saveexec_b32 s0, -1
+; GFX11-NEXT: s_add_i32 s1, s32, 0x4040
+; GFX11-NEXT: scratch_store_b32 off, v0, s1 ; 4-byte Folded Spill
+; GFX11-NEXT: s_mov_b32 exec_lo, s0
+; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
; GFX11-NEXT: s_and_b32 s0, 0, exec_lo
+; GFX11-NEXT: v_writelane_b32 v0, s55, 0
; GFX11-NEXT: s_addc_u32 s0, s32, 64
-; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX11-NEXT: s_bitcmp1_b32 s0, 0
; GFX11-NEXT: s_bitset0_b32 s0, 0
-; GFX11-NEXT: s_mov_b32 s59, s0
+; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-NEXT: s_mov_b32 s55, s0
; GFX11-NEXT: ;;#ASMSTART
-; GFX11-NEXT: ; use s59, scc
+; GFX11-NEXT: ; use s55, scc
; GFX11-NEXT: ;;#ASMEND
+; GFX11-NEXT: v_readlane_b32 s55, v0, 0
+; GFX11-NEXT: s_xor_saveexec_b32 s0, -1
+; GFX11-NEXT: s_add_i32 s1, s32, 0x4040
+; GFX11-NEXT: scratch_load_b32 v0, off, s1 ; 4-byte Folded Reload
+; GFX11-NEXT: s_mov_b32 exec_lo, s0
+; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_small_offset:
@@ -484,53 +754,97 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_small_offset()
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: s_xor_saveexec_b32 s0, -1
+; GFX12-NEXT: scratch_store_b32 off, v0, s32 offset:16384 ; 4-byte Folded Spill
+; GFX12-NEXT: s_wait_alu 0xfffe
+; GFX12-NEXT: s_mov_b32 exec_lo, s0
+; GFX12-NEXT: v_writelane_b32 v0, s55, 0
+; GFX12-NEXT: s_mov_b32 s55, s32
; GFX12-NEXT: s_and_b32 s0, 0, exec_lo
-; GFX12-NEXT: s_mov_b32 s59, s32
; GFX12-NEXT: ;;#ASMSTART
-; GFX12-NEXT: ; use s59, scc
+; GFX12-NEXT: ; use s55, scc
; GFX12-NEXT: ;;#ASMEND
+; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-NEXT: v_readlane_b32 s55, v0, 0
+; GFX12-NEXT: s_xor_saveexec_b32 s0, -1
+; GFX12-NEXT: scratch_load_b32 v0, off, s32 offset:16384 ; 4-byte Folded Reload
; GFX12-NEXT: s_wait_alu 0xfffe
+; GFX12-NEXT: s_mov_b32 exec_lo, s0
+; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_small_offset:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_lshrrev_b32_e64 v0, 6, s32
-; GFX8-NEXT: s_mov_b32 s59, 64
-; GFX8-NEXT: v_add_u32_e32 v0, vcc, s59, v0
+; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX8-NEXT: s_add_i32 s6, s32, 0x101000
+; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], s6 ; 4-byte Folded Spill
+; GFX8-NEXT: s_mov_b64 exec, s[4:5]
+; GFX8-NEXT: v_writelane_b32 v0, s55, 0
+; GFX8-NEXT: v_lshrrev_b32_e64 v1, 6, s32
+; GFX8-NEXT: s_mov_b32 s55, 64
+; GFX8-NEXT: v_add_u32_e32 v1, vcc, s55, v1
+; GFX8-NEXT: v_readfirstlane_b32 s55, v1
; GFX8-NEXT: s_and_b64 s[4:5], 0, exec
-; GFX8-NEXT: v_readfirstlane_b32 s59, v0
; GFX8-NEXT: ;;#ASMSTART
-; GFX8-NEXT: ; use s59, scc
+; GFX8-NEXT: ; use s55, scc
; GFX8-NEXT: ;;#ASMEND
+; GFX8-NEXT: v_readlane_b32 s55, v0, 0
+; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX8-NEXT: s_add_i32 s6, s32, 0x101000
+; GFX8-NEXT: buffer_load_dword v0, off, s[0:3], s6 ; 4-byte Folded Reload
+; GFX8-NEXT: s_mov_b64 exec, s[4:5]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX900-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_small_offset:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s32
-; GFX900-NEXT: v_add_u32_e32 v0, 64, v0
+; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX900-NEXT: s_add_i32 s6, s32, 0x101000
+; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s6 ; 4-byte Folded Spill
+; GFX900-NEXT: s_mov_b64 exec, s[4:5]
+; GFX900-NEXT: v_lshrrev_b32_e64 v1, 6, s32
+; GFX900-NEXT: v_add_u32_e32 v1, 64, v1
+; GFX900-NEXT: v_writelane_b32 v0, s55, 0
+; GFX900-NEXT: v_readfirstlane_b32 s55, v1
; GFX900-NEXT: s_and_b64 s[4:5], 0, exec
-; GFX900-NEXT: v_readfirstlane_b32 s59, v0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s59, scc
+; GFX900-NEXT: ; use s55, scc
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_readlane_b32 s55, v0, 0
+; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX900-NEXT: s_add_i32 s6, s32, 0x101000
+; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s6 ; 4-byte Folded Reload
+; GFX900-NEXT: s_mov_b64 exec, s[4:5]
+; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_small_offset:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1
+; GFX942-NEXT: s_add_i32 s2, s32, 0x4040
+; GFX942-NEXT: scratch_store_dword off, v0, s2 ; 4-byte Folded Spill
+; GFX942-NEXT: s_mov_b64 exec, s[0:1]
; GFX942-NEXT: s_and_b64 s[0:1], 0, exec
; GFX942-NEXT: s_addc_u32 s0, s32, 64
; GFX942-NEXT: s_bitcmp1_b32 s0, 0
; GFX942-NEXT: s_bitset0_b32 s0, 0
-; GFX942-NEXT: s_mov_b32 s59, s0
+; GFX942-NEXT: v_writelane_b32 v0, s55, 0
+; GFX942-NEXT: s_mov_b32 s55, s0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s59, scc
+; GFX942-NEXT: ; use s55, scc
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: v_readlane_b32 s55, v0, 0
+; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1
+; GFX942-NEXT: s_add_i32 s2, s32, 0x4040
+; GFX942-NEXT: scratch_load_dword v0, off, s2 ; 4-byte Folded Reload
+; GFX942-NEXT: s_mov_b64 exec, s[0:1]
+; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%alloca0 = alloca [4096 x i32], align 64, addrspace(5)
- call void asm sideeffect "; use $0, $1", "{s59},{scc}"(ptr addrspace(5) %alloca0, i32 0)
+ call void asm sideeffect "; use $0, $1", "{s55},{scc}"(ptr addrspace(5) %alloca0, i32 0)
ret void
}
@@ -538,32 +852,67 @@ define void @scalar_mov_materializes_frame_index_available_scc_small_offset() #0
; GFX10_1-LABEL: scalar_mov_materializes_frame_index_available_scc_small_offset:
; GFX10_1: ; %bb.0:
; GFX10_1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10_1-NEXT: s_lshr_b32 s59, s32, 5
-; GFX10_1-NEXT: s_add_i32 s59, s59, 64
+; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1
+; GFX10_1-NEXT: s_add_i32 s5, s32, 0x80800
+; GFX10_1-NEXT: buffer_store_dword v0, off, s[0:3], s5 ; 4-byte Folded Spill
+; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3
+; GFX10_1-NEXT: s_mov_b32 exec_lo, s4
+; GFX10_1-NEXT: v_writelane_b32 v0, s55, 0
+; GFX10_1-NEXT: s_lshr_b32 s55, s32, 5
+; GFX10_1-NEXT: s_add_i32 s55, s55, 64
; GFX10_1-NEXT: ;;#ASMSTART
-; GFX10_1-NEXT: ; use s59
+; GFX10_1-NEXT: ; use s55
; GFX10_1-NEXT: ;;#ASMEND
+; GFX10_1-NEXT: v_readlane_b32 s55, v0, 0
+; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1
+; GFX10_1-NEXT: s_add_i32 s5, s32, 0x80800
+; GFX10_1-NEXT: buffer_load_dword v0, off, s[0:3], s5 ; 4-byte Folded Reload
+; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3
+; GFX10_1-NEXT: s_mov_b32 exec_lo, s4
+; GFX10_1-NEXT: s_waitcnt vmcnt(0)
; GFX10_1-NEXT: s_setpc_b64 s[30:31]
;
; GFX10_3-LABEL: scalar_mov_materializes_frame_index_available_scc_small_offset:
; GFX10_3: ; %bb.0:
; GFX10_3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10_3-NEXT: s_lshr_b32 s59, s32, 5
-; GFX10_3-NEXT: s_add_i32 s59, s59, 64
+; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1
+; GFX10_3-NEXT: s_add_i32 s5, s32, 0x80800
+; GFX10_3-NEXT: buffer_store_dword v0, off, s[0:3], s5 ; 4-byte Folded Spill
+; GFX10_3-NEXT: s_mov_b32 exec_lo, s4
+; GFX10_3-NEXT: v_writelane_b32 v0, s55, 0
+; GFX10_3-NEXT: s_lshr_b32 s55, s32, 5
+; GFX10_3-NEXT: s_add_i32 s55, s55, 64
; GFX10_3-NEXT: ;;#ASMSTART
-; GFX10_3-NEXT: ; use s59
+; GFX10_3-NEXT: ; use s55
; GFX10_3-NEXT: ;;#ASMEND
+; GFX10_3-NEXT: v_readlane_b32 s55, v0, 0
+; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1
+; GFX10_3-NEXT: s_add_i32 s5, s32, 0x80800
+; GFX10_3-NEXT: buffer_load_dword v0, off, s[0:3], s5 ; 4-byte Folded Reload
+; GFX10_3-NEXT: s_mov_b32 exec_lo, s4
+; GFX10_3-NEXT: s_waitcnt vmcnt(0)
; GFX10_3-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: scalar_mov_materializes_frame_index_available_scc_small_offset:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: s_xor_saveexec_b32 s0, -1
+; GFX11-NEXT: s_add_i32 s1, s32, 0x4040
+; GFX11-NEXT: scratch_store_b32 off, v0, s1 ; 4-byte Folded Spill
+; GFX11-NEXT: s_mov_b32 exec_lo, s0
+; GFX11-NEXT: v_writelane_b32 v0, s55, 0
; GFX11-NEXT: s_add_i32 s0, s32, 64
-; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
-; GFX11-NEXT: s_mov_b32 s59, s0
+; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-NEXT: s_mov_b32 s55, s0
; GFX11-NEXT: ;;#ASMSTART
-; GFX11-NEXT: ; use s59
+; GFX11-NEXT: ; use s55
; GFX11-NEXT: ;;#ASMEND
+; GFX11-NEXT: v_readlane_b32 s55, v0, 0
+; GFX11-NEXT: s_xor_saveexec_b32 s0, -1
+; GFX11-NEXT: s_add_i32 s1, s32, 0x4040
+; GFX11-NEXT: scratch_load_b32 v0, off, s1 ; 4-byte Folded Reload
+; GFX11-NEXT: s_mov_b32 exec_lo, s0
+; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: scalar_mov_materializes_frame_index_available_scc_small_offset:
@@ -573,44 +922,88 @@ define void @scalar_mov_materializes_frame_index_available_scc_small_offset() #0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: s_mov_b32 s59, s32
+; GFX12-NEXT: s_xor_saveexec_b32 s0, -1
+; GFX12-NEXT: scratch_store_b32 off, v0, s32 offset:16384 ; 4-byte Folded Spill
+; GFX12-NEXT: s_wait_alu 0xfffe
+; GFX12-NEXT: s_mov_b32 exec_lo, s0
+; GFX12-NEXT: v_writelane_b32 v0, s55, 0
+; GFX12-NEXT: s_mov_b32 s55, s32
; GFX12-NEXT: ;;#ASMSTART
-; GFX12-NEXT: ; use s59
+; GFX12-NEXT: ; use s55
; GFX12-NEXT: ;;#ASMEND
+; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-NEXT: v_readlane_b32 s55, v0, 0
+; GFX12-NEXT: s_xor_saveexec_b32 s0, -1
+; GFX12-NEXT: scratch_load_b32 v0, off, s32 offset:16384 ; 4-byte Folded Reload
; GFX12-NEXT: s_wait_alu 0xfffe
+; GFX12-NEXT: s_mov_b32 exec_lo, s0
+; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: scalar_mov_materializes_frame_index_available_scc_small_offset:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: s_lshr_b32 s59, s32, 6
-; GFX8-NEXT: s_add_i32 s59, s59, 64
+; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX8-NEXT: s_add_i32 s6, s32, 0x101000
+; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], s6 ; 4-byte Folded Spill
+; GFX8-NEXT: s_mov_b64 exec, s[4:5]
+; GFX8-NEXT: v_writelane_b32 v0, s55, 0
+; GFX8-NEXT: s_lshr_b32 s55, s32, 6
+; GFX8-NEXT: s_add_i32 s55, s55, 64
; GFX8-NEXT: ;;#ASMSTART
-; GFX8-NEXT: ; use s59
+; GFX8-NEXT: ; use s55
; GFX8-NEXT: ;;#ASMEND
+; GFX8-NEXT: v_readlane_b32 s55, v0, 0
+; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX8-NEXT: s_add_i32 s6, s32, 0x101000
+; GFX8-NEXT: buffer_load_dword v0, off, s[0:3], s6 ; 4-byte Folded Reload
+; GFX8-NEXT: s_mov_b64 exec, s[4:5]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX900-LABEL: scalar_mov_materializes_frame_index_available_scc_small_offset:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: s_lshr_b32 s59, s32, 6
-; GFX900-NEXT: s_add_i32 s59, s59, 64
+; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX900-NEXT: s_add_i32 s6, s32, 0x101000
+; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s6 ; 4-byte Folded Spill
+; GFX900-NEXT: s_mov_b64 exec, s[4:5]
+; GFX900-NEXT: v_writelane_b32 v0, s55, 0
+; GFX900-NEXT: s_lshr_b32 s55, s32, 6
+; GFX900-NEXT: s_add_i32 s55, s55, 64
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s59
+; GFX900-NEXT: ; use s55
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_readlane_b32 s55, v0, 0
+; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX900-NEXT: s_add_i32 s6, s32, 0x101000
+; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s6 ; 4-byte Folded Reload
+; GFX900-NEXT: s_mov_b64 exec, s[4:5]
+; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: scalar_mov_materializes_frame_index_available_scc_small_offset:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1
+; GFX942-NEXT: s_add_i32 s2, s32, 0x4040
+; GFX942-NEXT: scratch_store_dword off, v0, s2 ; 4-byte Folded Spill
+; GFX942-NEXT: s_mov_b64 exec, s[0:1]
; GFX942-NEXT: s_add_i32 s0, s32, 64
-; GFX942-NEXT: s_mov_b32 s59, s0
+; GFX942-NEXT: v_writelane_b32 v0, s55, 0
+; GFX942-NEXT: s_mov_b32 s55, s0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s59
+; GFX942-NEXT: ; use s55
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: v_readlane_b32 s55, v0, 0
+; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1
+; GFX942-NEXT: s_add_i32 s2, s32, 0x4040
+; GFX942-NEXT: scratch_load_dword v0, off, s2 ; 4-byte Folded Reload
+; GFX942-NEXT: s_mov_b64 exec, s[0:1]
+; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%alloca0 = alloca [4096 x i32], align 64, addrspace(5)
- call void asm sideeffect "; use $0", "{s59}"(ptr addrspace(5) %alloca0)
+ call void asm sideeffect "; use $0", "{s55}"(ptr addrspace(5) %alloca0)
ret void
}
@@ -620,16 +1013,29 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_small_offset_fp
; GFX10_1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10_1-NEXT: s_mov_b32 s5, s33
; GFX10_1-NEXT: s_mov_b32 s33, s32
-; GFX10_1-NEXT: s_add_i32 s32, s32, 0x80800
-; GFX10_1-NEXT: v_lshrrev_b32_e64 v0, 5, s33
+; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1
+; GFX10_1-NEXT: s_add_i32 s6, s33, 0x80800
+; GFX10_1-NEXT: buffer_store_dword v0, off, s[0:3], s6 ; 4-byte Folded Spill
+; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3
+; GFX10_1-NEXT: s_mov_b32 exec_lo, s4
+; GFX10_1-NEXT: v_lshrrev_b32_e64 v1, 5, s33
+; GFX10_1-NEXT: v_writelane_b32 v0, s55, 0
+; GFX10_1-NEXT: s_add_i32 s32, s32, 0x81000
; GFX10_1-NEXT: s_and_b32 s4, 0, exec_lo
; GFX10_1-NEXT: s_mov_b32 s32, s33
-; GFX10_1-NEXT: s_mov_b32 s33, s5
-; GFX10_1-NEXT: v_add_nc_u32_e32 v0, 64, v0
-; GFX10_1-NEXT: v_readfirstlane_b32 s59, v0
+; GFX10_1-NEXT: v_add_nc_u32_e32 v1, 64, v1
+; GFX10_1-NEXT: v_readfirstlane_b32 s55, v1
; GFX10_1-NEXT: ;;#ASMSTART
-; GFX10_1-NEXT: ; use s59, scc
+; GFX10_1-NEXT: ; use s55, scc
; GFX10_1-NEXT: ;;#ASMEND
+; GFX10_1-NEXT: v_readlane_b32 s55, v0, 0
+; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1
+; GFX10_1-NEXT: s_add_i32 s6, s33, 0x80800
+; GFX10_1-NEXT: buffer_load_dword v0, off, s[0:3], s6 ; 4-byte Folded Reload
+; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3
+; GFX10_1-NEXT: s_mov_b32 exec_lo, s4
+; GFX10_1-NEXT: s_mov_b32 s33, s5
+; GFX10_1-NEXT: s_waitcnt vmcnt(0)
; GFX10_1-NEXT: s_setpc_b64 s[30:31]
;
; GFX10_3-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_small_offset_fp:
@@ -637,16 +1043,27 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_small_offset_fp
; GFX10_3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10_3-NEXT: s_mov_b32 s5, s33
; GFX10_3-NEXT: s_mov_b32 s33, s32
-; GFX10_3-NEXT: s_add_i32 s32, s32, 0x80800
-; GFX10_3-NEXT: v_lshrrev_b32_e64 v0, 5, s33
+; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1
+; GFX10_3-NEXT: s_add_i32 s6, s33, 0x80800
+; GFX10_3-NEXT: buffer_store_dword v0, off, s[0:3], s6 ; 4-byte Folded Spill
+; GFX10_3-NEXT: s_mov_b32 exec_lo, s4
+; GFX10_3-NEXT: v_lshrrev_b32_e64 v1, 5, s33
+; GFX10_3-NEXT: v_writelane_b32 v0, s55, 0
+; GFX10_3-NEXT: s_add_i32 s32, s32, 0x81000
; GFX10_3-NEXT: s_and_b32 s4, 0, exec_lo
; GFX10_3-NEXT: s_mov_b32 s32, s33
-; GFX10_3-NEXT: s_mov_b32 s33, s5
-; GFX10_3-NEXT: v_add_nc_u32_e32 v0, 64, v0
-; GFX10_3-NEXT: v_readfirstlane_b32 s59, v0
+; GFX10_3-NEXT: v_add_nc_u32_e32 v1, 64, v1
+; GFX10_3-NEXT: v_readfirstlane_b32 s55, v1
; GFX10_3-NEXT: ;;#ASMSTART
-; GFX10_3-NEXT: ; use s59, scc
+; GFX10_3-NEXT: ; use s55, scc
; GFX10_3-NEXT: ;;#ASMEND
+; GFX10_3-NEXT: v_readlane_b32 s55, v0, 0
+; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1
+; GFX10_3-NEXT: s_add_i32 s6, s33, 0x80800
+; GFX10_3-NEXT: buffer_load_dword v0, off, s[0:3], s6 ; 4-byte Folded Reload
+; GFX10_3-NEXT: s_mov_b32 exec_lo, s4
+; GFX10_3-NEXT: s_mov_b32 s33, s5
+; GFX10_3-NEXT: s_waitcnt vmcnt(0)
; GFX10_3-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_small_offset_fp:
@@ -654,17 +1071,29 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_small_offset_fp
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_mov_b32 s1, s33
; GFX11-NEXT: s_mov_b32 s33, s32
-; GFX11-NEXT: s_addk_i32 s32, 0x4040
+; GFX11-NEXT: s_xor_saveexec_b32 s0, -1
+; GFX11-NEXT: s_add_i32 s2, s33, 0x4040
+; GFX11-NEXT: scratch_store_b32 off, v0, s2 ; 4-byte Folded Spill
+; GFX11-NEXT: s_mov_b32 exec_lo, s0
+; GFX11-NEXT: s_addk_i32 s32, 0x4080
; GFX11-NEXT: s_and_b32 s0, 0, exec_lo
+; GFX11-NEXT: v_writelane_b32 v0, s55, 0
; GFX11-NEXT: s_addc_u32 s0, s33, 64
; GFX11-NEXT: s_mov_b32 s32, s33
; GFX11-NEXT: s_bitcmp1_b32 s0, 0
; GFX11-NEXT: s_bitset0_b32 s0, 0
-; GFX11-NEXT: s_mov_b32 s33, s1
-; GFX11-NEXT: s_mov_b32 s59, s0
+; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-NEXT: s_mov_b32 s55, s0
; GFX11-NEXT: ;;#ASMSTART
-; GFX11-NEXT: ; use s59, scc
+; GFX11-NEXT: ; use s55, scc
; GFX11-NEXT: ;;#ASMEND
+; GFX11-NEXT: v_readlane_b32 s55, v0, 0
+; GFX11-NEXT: s_xor_saveexec_b32 s0, -1
+; GFX11-NEXT: s_add_i32 s2, s33, 0x4040
+; GFX11-NEXT: scratch_load_b32 v0, off, s2 ; 4-byte Folded Reload
+; GFX11-NEXT: s_mov_b32 exec_lo, s0
+; GFX11-NEXT: s_mov_b32 s33, s1
+; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_small_offset_fp:
@@ -676,15 +1105,25 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_small_offset_fp
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: s_mov_b32 s1, s33
; GFX12-NEXT: s_mov_b32 s33, s32
+; GFX12-NEXT: s_xor_saveexec_b32 s0, -1
+; GFX12-NEXT: scratch_store_b32 off, v0, s33 offset:16384 ; 4-byte Folded Spill
+; GFX12-NEXT: s_wait_alu 0xfffe
+; GFX12-NEXT: s_mov_b32 exec_lo, s0
+; GFX12-NEXT: v_writelane_b32 v0, s55, 0
; GFX12-NEXT: s_addk_co_i32 s32, 0x4040
+; GFX12-NEXT: s_mov_b32 s55, s33
; GFX12-NEXT: s_and_b32 s0, 0, exec_lo
-; GFX12-NEXT: s_wait_alu 0xfffe
-; GFX12-NEXT: s_mov_b32 s59, s33
; GFX12-NEXT: ;;#ASMSTART
-; GFX12-NEXT: ; use s59, scc
+; GFX12-NEXT: ; use s55, scc
; GFX12-NEXT: ;;#ASMEND
+; GFX12-NEXT: v_readlane_b32 s55, v0, 0
; GFX12-NEXT: s_mov_b32 s32, s33
+; GFX12-NEXT: s_xor_saveexec_b32 s0, -1
+; GFX12-NEXT: scratch_load_b32 v0, off, s33 offset:16384 ; 4-byte Folded Reload
+; GFX12-NEXT: s_wait_alu 0xfffe
+; GFX12-NEXT: s_mov_b32 exec_lo, s0
; GFX12-NEXT: s_mov_b32 s33, s1
+; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
@@ -693,17 +1132,28 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_small_offset_fp
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: s_mov_b32 s6, s33
; GFX8-NEXT: s_mov_b32 s33, s32
-; GFX8-NEXT: v_lshrrev_b32_e64 v0, 6, s33
-; GFX8-NEXT: s_mov_b32 s59, 64
-; GFX8-NEXT: s_add_i32 s32, s32, 0x101000
-; GFX8-NEXT: v_add_u32_e32 v0, vcc, s59, v0
+; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX8-NEXT: s_add_i32 s7, s33, 0x101000
+; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], s7 ; 4-byte Folded Spill
+; GFX8-NEXT: s_mov_b64 exec, s[4:5]
+; GFX8-NEXT: v_writelane_b32 v0, s55, 0
+; GFX8-NEXT: v_lshrrev_b32_e64 v1, 6, s33
+; GFX8-NEXT: s_mov_b32 s55, 64
+; GFX8-NEXT: v_add_u32_e32 v1, vcc, s55, v1
+; GFX8-NEXT: s_add_i32 s32, s32, 0x102000
+; GFX8-NEXT: v_readfirstlane_b32 s55, v1
; GFX8-NEXT: s_and_b64 s[4:5], 0, exec
-; GFX8-NEXT: v_readfirstlane_b32 s59, v0
; GFX8-NEXT: ;;#ASMSTART
-; GFX8-NEXT: ; use s59, scc
+; GFX8-NEXT: ; use s55, scc
; GFX8-NEXT: ;;#ASMEND
+; GFX8-NEXT: v_readlane_b32 s55, v0, 0
; GFX8-NEXT: s_mov_b32 s32, s33
+; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX8-NEXT: s_add_i32 s7, s33, 0x101000
+; GFX8-NEXT: buffer_load_dword v0, off, s[0:3], s7 ; 4-byte Folded Reload
+; GFX8-NEXT: s_mov_b64 exec, s[4:5]
; GFX8-NEXT: s_mov_b32 s33, s6
+; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX900-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_small_offset_fp:
@@ -711,16 +1161,27 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_small_offset_fp
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: s_mov_b32 s6, s33
; GFX900-NEXT: s_mov_b32 s33, s32
-; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s33
-; GFX900-NEXT: s_add_i32 s32, s32, 0x101000
-; GFX900-NEXT: v_add_u32_e32 v0, 64, v0
+; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX900-NEXT: s_add_i32 s7, s33, 0x101000
+; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s7 ; 4-byte Folded Spill
+; GFX900-NEXT: s_mov_b64 exec, s[4:5]
+; GFX900-NEXT: v_lshrrev_b32_e64 v1, 6, s33
+; GFX900-NEXT: v_add_u32_e32 v1, 64, v1
+; GFX900-NEXT: s_add_i32 s32, s32, 0x102000
+; GFX900-NEXT: v_writelane_b32 v0, s55, 0
+; GFX900-NEXT: v_readfirstlane_b32 s55, v1
; GFX900-NEXT: s_and_b64 s[4:5], 0, exec
-; GFX900-NEXT: v_readfirstlane_b32 s59, v0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s59, scc
+; GFX900-NEXT: ; use s55, scc
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_readlane_b32 s55, v0, 0
; GFX900-NEXT: s_mov_b32 s32, s33
+; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX900-NEXT: s_add_i32 s7, s33, 0x101000
+; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s7 ; 4-byte Folded Reload
+; GFX900-NEXT: s_mov_b64 exec, s[4:5]
; GFX900-NEXT: s_mov_b32 s33, s6
+; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_small_offset_fp:
@@ -728,20 +1189,31 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_small_offset_fp
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: s_mov_b32 s2, s33
; GFX942-NEXT: s_mov_b32 s33, s32
-; GFX942-NEXT: s_addk_i32 s32, 0x4040
+; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1
+; GFX942-NEXT: s_add_i32 s3, s33, 0x4040
+; GFX942-NEXT: scratch_store_dword off, v0, s3 ; 4-byte Folded Spill
+; GFX942-NEXT: s_mov_b64 exec, s[0:1]
+; GFX942-NEXT: s_addk_i32 s32, 0x4080
; GFX942-NEXT: s_and_b64 s[0:1], 0, exec
; GFX942-NEXT: s_addc_u32 s0, s33, 64
; GFX942-NEXT: s_bitcmp1_b32 s0, 0
; GFX942-NEXT: s_bitset0_b32 s0, 0
-; GFX942-NEXT: s_mov_b32 s59, s0
+; GFX942-NEXT: v_writelane_b32 v0, s55, 0
+; GFX942-NEXT: s_mov_b32 s55, s0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s59, scc
+; GFX942-NEXT: ; use s55, scc
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: v_readlane_b32 s55, v0, 0
; GFX942-NEXT: s_mov_b32 s32, s33
+; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1
+; GFX942-NEXT: s_add_i32 s3, s33, 0x4040
+; GFX942-NEXT: scratch_load_dword v0, off, s3 ; 4-byte Folded Reload
+; GFX942-NEXT: s_mov_b64 exec, s[0:1]
; GFX942-NEXT: s_mov_b32 s33, s2
+; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%alloca0 = alloca [4096 x i32], align 64, addrspace(5)
- call void asm sideeffect "; use $0, $1", "{s59},{scc}"(ptr addrspace(5) %alloca0, i32 0)
+ call void asm sideeffect "; use $0, $1", "{s55},{scc}"(ptr addrspace(5) %alloca0, i32 0)
ret void
}
@@ -751,14 +1223,27 @@ define void @scalar_mov_materializes_frame_index_available_scc_small_offset_fp()
; GFX10_1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10_1-NEXT: s_mov_b32 s4, s33
; GFX10_1-NEXT: s_mov_b32 s33, s32
-; GFX10_1-NEXT: s_add_i32 s32, s32, 0x80800
-; GFX10_1-NEXT: s_lshr_b32 s59, s33, 5
+; GFX10_1-NEXT: s_xor_saveexec_b32 s5, -1
+; GFX10_1-NEXT: s_add_i32 s6, s33, 0x80800
+; GFX10_1-NEXT: buffer_store_dword v0, off, s[0:3], s6 ; 4-byte Folded Spill
+; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3
+; GFX10_1-NEXT: s_mov_b32 exec_lo, s5
+; GFX10_1-NEXT: v_writelane_b32 v0, s55, 0
+; GFX10_1-NEXT: s_add_i32 s32, s32, 0x81000
+; GFX10_1-NEXT: s_lshr_b32 s55, s33, 5
; GFX10_1-NEXT: s_mov_b32 s32, s33
-; GFX10_1-NEXT: s_add_i32 s59, s59, 64
+; GFX10_1-NEXT: s_add_i32 s55, s55, 64
; GFX10_1-NEXT: ;;#ASMSTART
-; GFX10_1-NEXT: ; use s59
+; GFX10_1-NEXT: ; use s55
; GFX10_1-NEXT: ;;#ASMEND
+; GFX10_1-NEXT: v_readlane_b32 s55, v0, 0
+; GFX10_1-NEXT: s_xor_saveexec_b32 s5, -1
+; GFX10_1-NEXT: s_add_i32 s6, s33, 0x80800
+; GFX10_1-NEXT: buffer_load_dword v0, off, s[0:3], s6 ; 4-byte Folded Reload
+; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3
+; GFX10_1-NEXT: s_mov_b32 exec_lo, s5
; GFX10_1-NEXT: s_mov_b32 s33, s4
+; GFX10_1-NEXT: s_waitcnt vmcnt(0)
; GFX10_1-NEXT: s_setpc_b64 s[30:31]
;
; GFX10_3-LABEL: scalar_mov_materializes_frame_index_available_scc_small_offset_fp:
@@ -766,14 +1251,25 @@ define void @scalar_mov_materializes_frame_index_available_scc_small_offset_fp()
; GFX10_3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10_3-NEXT: s_mov_b32 s4, s33
; GFX10_3-NEXT: s_mov_b32 s33, s32
-; GFX10_3-NEXT: s_add_i32 s32, s32, 0x80800
-; GFX10_3-NEXT: s_lshr_b32 s59, s33, 5
+; GFX10_3-NEXT: s_xor_saveexec_b32 s5, -1
+; GFX10_3-NEXT: s_add_i32 s6, s33, 0x80800
+; GFX10_3-NEXT: buffer_store_dword v0, off, s[0:3], s6 ; 4-byte Folded Spill
+; GFX10_3-NEXT: s_mov_b32 exec_lo, s5
+; GFX10_3-NEXT: v_writelane_b32 v0, s55, 0
+; GFX10_3-NEXT: s_add_i32 s32, s32, 0x81000
+; GFX10_3-NEXT: s_lshr_b32 s55, s33, 5
; GFX10_3-NEXT: s_mov_b32 s32, s33
-; GFX10_3-NEXT: s_add_i32 s59, s59, 64
+; GFX10_3-NEXT: s_add_i32 s55, s55, 64
; GFX10_3-NEXT: ;;#ASMSTART
-; GFX10_3-NEXT: ; use s59
+; GFX10_3-NEXT: ; use s55
; GFX10_3-NEXT: ;;#ASMEND
+; GFX10_3-NEXT: v_readlane_b32 s55, v0, 0
+; GFX10_3-NEXT: s_xor_saveexec_b32 s5, -1
+; GFX10_3-NEXT: s_add_i32 s6, s33, 0x80800
+; GFX10_3-NEXT: buffer_load_dword v0, off, s[0:3], s6 ; 4-byte Folded Reload
+; GFX10_3-NEXT: s_mov_b32 exec_lo, s5
; GFX10_3-NEXT: s_mov_b32 s33, s4
+; GFX10_3-NEXT: s_waitcnt vmcnt(0)
; GFX10_3-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: scalar_mov_materializes_frame_index_available_scc_small_offset_fp:
@@ -781,14 +1277,25 @@ define void @scalar_mov_materializes_frame_index_available_scc_small_offset_fp()
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
-; GFX11-NEXT: s_addk_i32 s32, 0x4040
+; GFX11-NEXT: s_xor_saveexec_b32 s1, -1
+; GFX11-NEXT: s_add_i32 s2, s33, 0x4040
+; GFX11-NEXT: scratch_store_b32 off, v0, s2 ; 4-byte Folded Spill
+; GFX11-NEXT: s_mov_b32 exec_lo, s1
+; GFX11-NEXT: v_writelane_b32 v0, s55, 0
+; GFX11-NEXT: s_addk_i32 s32, 0x4080
; GFX11-NEXT: s_add_i32 s1, s33, 64
; GFX11-NEXT: s_mov_b32 s32, s33
-; GFX11-NEXT: s_mov_b32 s59, s1
+; GFX11-NEXT: s_mov_b32 s55, s1
; GFX11-NEXT: ;;#ASMSTART
-; GFX11-NEXT: ; use s59
+; GFX11-NEXT: ; use s55
; GFX11-NEXT: ;;#ASMEND
+; GFX11-NEXT: v_readlane_b32 s55, v0, 0
+; GFX11-NEXT: s_xor_saveexec_b32 s1, -1
+; GFX11-NEXT: s_add_i32 s2, s33, 0x4040
+; GFX11-NEXT: scratch_load_b32 v0, off, s2 ; 4-byte Folded Reload
+; GFX11-NEXT: s_mov_b32 exec_lo, s1
; GFX11-NEXT: s_mov_b32 s33, s0
+; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: scalar_mov_materializes_frame_index_available_scc_small_offset_fp:
@@ -800,14 +1307,24 @@ define void @scalar_mov_materializes_frame_index_available_scc_small_offset_fp()
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: s_mov_b32 s0, s33
; GFX12-NEXT: s_mov_b32 s33, s32
-; GFX12-NEXT: s_addk_co_i32 s32, 0x4040
+; GFX12-NEXT: s_xor_saveexec_b32 s1, -1
+; GFX12-NEXT: scratch_store_b32 off, v0, s33 offset:16384 ; 4-byte Folded Spill
; GFX12-NEXT: s_wait_alu 0xfffe
-; GFX12-NEXT: s_mov_b32 s59, s33
+; GFX12-NEXT: s_mov_b32 exec_lo, s1
+; GFX12-NEXT: v_writelane_b32 v0, s55, 0
+; GFX12-NEXT: s_addk_co_i32 s32, 0x4040
+; GFX12-NEXT: s_mov_b32 s55, s33
; GFX12-NEXT: ;;#ASMSTART
-; GFX12-NEXT: ; use s59
+; GFX12-NEXT: ; use s55
; GFX12-NEXT: ;;#ASMEND
; GFX12-NEXT: s_mov_b32 s32, s33
+; GFX12-NEXT: v_readlane_b32 s55, v0, 0
+; GFX12-NEXT: s_xor_saveexec_b32 s1, -1
+; GFX12-NEXT: scratch_load_b32 v0, off, s33 offset:16384 ; 4-byte Folded Reload
+; GFX12-NEXT: s_wait_alu 0xfffe
+; GFX12-NEXT: s_mov_b32 exec_lo, s1
; GFX12-NEXT: s_mov_b32 s33, s0
+; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
@@ -816,14 +1333,25 @@ define void @scalar_mov_materializes_frame_index_available_scc_small_offset_fp()
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: s_mov_b32 s4, s33
; GFX8-NEXT: s_mov_b32 s33, s32
-; GFX8-NEXT: s_add_i32 s32, s32, 0x101000
-; GFX8-NEXT: s_lshr_b32 s59, s33, 6
-; GFX8-NEXT: s_add_i32 s59, s59, 64
+; GFX8-NEXT: s_xor_saveexec_b64 s[6:7], -1
+; GFX8-NEXT: s_add_i32 s5, s33, 0x101000
+; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], s5 ; 4-byte Folded Spill
+; GFX8-NEXT: s_mov_b64 exec, s[6:7]
+; GFX8-NEXT: s_add_i32 s32, s32, 0x102000
+; GFX8-NEXT: v_writelane_b32 v0, s55, 0
+; GFX8-NEXT: s_lshr_b32 s55, s33, 6
+; GFX8-NEXT: s_add_i32 s55, s55, 64
; GFX8-NEXT: ;;#ASMSTART
-; GFX8-NEXT: ; use s59
+; GFX8-NEXT: ; use s55
; GFX8-NEXT: ;;#ASMEND
+; GFX8-NEXT: v_readlane_b32 s55, v0, 0
; GFX8-NEXT: s_mov_b32 s32, s33
+; GFX8-NEXT: s_xor_saveexec_b64 s[6:7], -1
+; GFX8-NEXT: s_add_i32 s5, s33, 0x101000
+; GFX8-NEXT: buffer_load_dword v0, off, s[0:3], s5 ; 4-byte Folded Reload
+; GFX8-NEXT: s_mov_b64 exec, s[6:7]
; GFX8-NEXT: s_mov_b32 s33, s4
+; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX900-LABEL: scalar_mov_materializes_frame_index_available_scc_small_offset_fp:
@@ -831,14 +1359,25 @@ define void @scalar_mov_materializes_frame_index_available_scc_small_offset_fp()
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: s_mov_b32 s4, s33
; GFX900-NEXT: s_mov_b32 s33, s32
-; GFX900-NEXT: s_add_i32 s32, s32, 0x101000
-; GFX900-NEXT: s_lshr_b32 s59, s33, 6
-; GFX900-NEXT: s_add_i32 s59, s59, 64
+; GFX900-NEXT: s_xor_saveexec_b64 s[6:7], -1
+; GFX900-NEXT: s_add_i32 s5, s33, 0x101000
+; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s5 ; 4-byte Folded Spill
+; GFX900-NEXT: s_mov_b64 exec, s[6:7]
+; GFX900-NEXT: s_add_i32 s32, s32, 0x102000
+; GFX900-NEXT: v_writelane_b32 v0, s55, 0
+; GFX900-NEXT: s_lshr_b32 s55, s33, 6
+; GFX900-NEXT: s_add_i32 s55, s55, 64
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s59
+; GFX900-NEXT: ; use s55
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_readlane_b32 s55, v0, 0
; GFX900-NEXT: s_mov_b32 s32, s33
+; GFX900-NEXT: s_xor_saveexec_b64 s[6:7], -1
+; GFX900-NEXT: s_add_i32 s5, s33, 0x101000
+; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s5 ; 4-byte Folded Reload
+; GFX900-NEXT: s_mov_b64 exec, s[6:7]
; GFX900-NEXT: s_mov_b32 s33, s4
+; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: scalar_mov_materializes_frame_index_available_scc_small_offset_fp:
@@ -846,17 +1385,28 @@ define void @scalar_mov_materializes_frame_index_available_scc_small_offset_fp()
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: s_mov_b32 s0, s33
; GFX942-NEXT: s_mov_b32 s33, s32
-; GFX942-NEXT: s_addk_i32 s32, 0x4040
+; GFX942-NEXT: s_xor_saveexec_b64 s[2:3], -1
+; GFX942-NEXT: s_add_i32 s1, s33, 0x4040
+; GFX942-NEXT: scratch_store_dword off, v0, s1 ; 4-byte Folded Spill
+; GFX942-NEXT: s_mov_b64 exec, s[2:3]
+; GFX942-NEXT: s_addk_i32 s32, 0x4080
; GFX942-NEXT: s_add_i32 s1, s33, 64
-; GFX942-NEXT: s_mov_b32 s59, s1
+; GFX942-NEXT: v_writelane_b32 v0, s55, 0
+; GFX942-NEXT: s_mov_b32 s55, s1
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s59
+; GFX942-NEXT: ; use s55
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: v_readlane_b32 s55, v0, 0
; GFX942-NEXT: s_mov_b32 s32, s33
+; GFX942-NEXT: s_xor_saveexec_b64 s[2:3], -1
+; GFX942-NEXT: s_add_i32 s1, s33, 0x4040
+; GFX942-NEXT: scratch_load_dword v0, off, s1 ; 4-byte Folded Reload
+; GFX942-NEXT: s_mov_b64 exec, s[2:3]
; GFX942-NEXT: s_mov_b32 s33, s0
+; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%alloca0 = alloca [4096 x i32], align 64, addrspace(5)
- call void asm sideeffect "; use $0", "{s59}"(ptr addrspace(5) %alloca0)
+ call void asm sideeffect "; use $0", "{s55}"(ptr addrspace(5) %alloca0)
ret void
}
@@ -864,48 +1414,83 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc__gep_immoffset(
; GFX10_1-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_immoffset:
; GFX10_1: ; %bb.0:
; GFX10_1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1
+; GFX10_1-NEXT: s_add_i32 s5, s32, 0x100800
+; GFX10_1-NEXT: buffer_store_dword v1, off, s[0:3], s5 ; 4-byte Folded Spill
+; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3
+; GFX10_1-NEXT: s_mov_b32 exec_lo, s4
+; GFX10_1-NEXT: v_writelane_b32 v1, s55, 0
; GFX10_1-NEXT: v_lshrrev_b32_e64 v0, 5, s32
; GFX10_1-NEXT: s_lshr_b32 s4, s32, 5
-; GFX10_1-NEXT: s_add_i32 s59, s4, 0x442c
+; GFX10_1-NEXT: s_add_i32 s55, s4, 0x442c
; GFX10_1-NEXT: s_and_b32 s4, 0, exec_lo
; GFX10_1-NEXT: v_add_nc_u32_e32 v0, 64, v0
; GFX10_1-NEXT: ;;#ASMSTART
; GFX10_1-NEXT: ; use alloca0 v0
; GFX10_1-NEXT: ;;#ASMEND
; GFX10_1-NEXT: ;;#ASMSTART
-; GFX10_1-NEXT: ; use s59, scc
+; GFX10_1-NEXT: ; use s55, scc
; GFX10_1-NEXT: ;;#ASMEND
+; GFX10_1-NEXT: v_readlane_b32 s55, v1, 0
+; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1
+; GFX10_1-NEXT: s_add_i32 s5, s32, 0x100800
+; GFX10_1-NEXT: buffer_load_dword v1, off, s[0:3], s5 ; 4-byte Folded Reload
+; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3
+; GFX10_1-NEXT: s_mov_b32 exec_lo, s4
+; GFX10_1-NEXT: s_waitcnt vmcnt(0)
; GFX10_1-NEXT: s_setpc_b64 s[30:31]
;
; GFX10_3-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_immoffset:
; GFX10_3: ; %bb.0:
; GFX10_3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1
+; GFX10_3-NEXT: s_add_i32 s5, s32, 0x100800
+; GFX10_3-NEXT: buffer_store_dword v1, off, s[0:3], s5 ; 4-byte Folded Spill
+; GFX10_3-NEXT: s_mov_b32 exec_lo, s4
+; GFX10_3-NEXT: v_writelane_b32 v1, s55, 0
; GFX10_3-NEXT: v_lshrrev_b32_e64 v0, 5, s32
; GFX10_3-NEXT: s_lshr_b32 s4, s32, 5
-; GFX10_3-NEXT: s_add_i32 s59, s4, 0x442c
+; GFX10_3-NEXT: s_add_i32 s55, s4, 0x442c
; GFX10_3-NEXT: s_and_b32 s4, 0, exec_lo
; GFX10_3-NEXT: v_add_nc_u32_e32 v0, 64, v0
; GFX10_3-NEXT: ;;#ASMSTART
; GFX10_3-NEXT: ; use alloca0 v0
; GFX10_3-NEXT: ;;#ASMEND
; GFX10_3-NEXT: ;;#ASMSTART
-; GFX10_3-NEXT: ; use s59, scc
+; GFX10_3-NEXT: ; use s55, scc
; GFX10_3-NEXT: ;;#ASMEND
+; GFX10_3-NEXT: v_readlane_b32 s55, v1, 0
+; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1
+; GFX10_3-NEXT: s_add_i32 s5, s32, 0x100800
+; GFX10_3-NEXT: buffer_load_dword v1, off, s[0:3], s5 ; 4-byte Folded Reload
+; GFX10_3-NEXT: s_mov_b32 exec_lo, s4
+; GFX10_3-NEXT: s_waitcnt vmcnt(0)
; GFX10_3-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_immoffset:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: s_xor_saveexec_b32 s0, -1
+; GFX11-NEXT: s_add_i32 s1, s32, 0x8040
+; GFX11-NEXT: scratch_store_b32 off, v1, s1 ; 4-byte Folded Spill
+; GFX11-NEXT: s_mov_b32 exec_lo, s0
+; GFX11-NEXT: v_writelane_b32 v1, s55, 0
; GFX11-NEXT: s_add_i32 s0, s32, 64
-; GFX11-NEXT: s_add_i32 s59, s32, 0x442c
+; GFX11-NEXT: s_add_i32 s55, s32, 0x442c
; GFX11-NEXT: v_mov_b32_e32 v0, s0
; GFX11-NEXT: s_and_b32 s0, 0, exec_lo
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; use alloca0 v0
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: ;;#ASMSTART
-; GFX11-NEXT: ; use s59, scc
+; GFX11-NEXT: ; use s55, scc
; GFX11-NEXT: ;;#ASMEND
+; GFX11-NEXT: v_readlane_b32 s55, v1, 0
+; GFX11-NEXT: s_xor_saveexec_b32 s0, -1
+; GFX11-NEXT: s_add_i32 s1, s32, 0x8040
+; GFX11-NEXT: scratch_load_b32 v1, off, s1 ; 4-byte Folded Reload
+; GFX11-NEXT: s_mov_b32 exec_lo, s0
+; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_immoffset:
@@ -915,23 +1500,38 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc__gep_immoffset(
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: s_add_co_i32 s59, s32, 0x43ec
+; GFX12-NEXT: s_xor_saveexec_b32 s0, -1
+; GFX12-NEXT: scratch_store_b32 off, v1, s32 offset:32768 ; 4-byte Folded Spill
+; GFX12-NEXT: s_wait_alu 0xfffe
+; GFX12-NEXT: s_mov_b32 exec_lo, s0
+; GFX12-NEXT: v_writelane_b32 v1, s55, 0
+; GFX12-NEXT: s_add_co_i32 s55, s32, 0x43ec
; GFX12-NEXT: v_mov_b32_e32 v0, s32
; GFX12-NEXT: s_and_b32 s0, 0, exec_lo
; GFX12-NEXT: ;;#ASMSTART
; GFX12-NEXT: ; use alloca0 v0
; GFX12-NEXT: ;;#ASMEND
; GFX12-NEXT: ;;#ASMSTART
-; GFX12-NEXT: ; use s59, scc
+; GFX12-NEXT: ; use s55, scc
; GFX12-NEXT: ;;#ASMEND
+; GFX12-NEXT: v_readlane_b32 s55, v1, 0
+; GFX12-NEXT: s_xor_saveexec_b32 s0, -1
+; GFX12-NEXT: scratch_load_b32 v1, off, s32 offset:32768 ; 4-byte Folded Reload
; GFX12-NEXT: s_wait_alu 0xfffe
+; GFX12-NEXT: s_mov_b32 exec_lo, s0
+; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_immoffset:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX8-NEXT: s_add_i32 s6, s32, 0x201000
+; GFX8-NEXT: buffer_store_dword v1, off, s[0:3], s6 ; 4-byte Folded Spill
+; GFX8-NEXT: s_mov_b64 exec, s[4:5]
; GFX8-NEXT: s_lshr_b32 s4, s32, 6
-; GFX8-NEXT: s_add_i32 s59, s4, 0x442c
+; GFX8-NEXT: v_writelane_b32 v1, s55, 0
+; GFX8-NEXT: s_add_i32 s55, s4, 0x442c
; GFX8-NEXT: v_lshrrev_b32_e64 v0, 6, s32
; GFX8-NEXT: v_add_u32_e32 v0, vcc, 64, v0
; GFX8-NEXT: ;;#ASMSTART
@@ -939,15 +1539,26 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc__gep_immoffset(
; GFX8-NEXT: ;;#ASMEND
; GFX8-NEXT: s_and_b64 s[4:5], 0, exec
; GFX8-NEXT: ;;#ASMSTART
-; GFX8-NEXT: ; use s59, scc
+; GFX8-NEXT: ; use s55, scc
; GFX8-NEXT: ;;#ASMEND
+; GFX8-NEXT: v_readlane_b32 s55, v1, 0
+; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX8-NEXT: s_add_i32 s6, s32, 0x201000
+; GFX8-NEXT: buffer_load_dword v1, off, s[0:3], s6 ; 4-byte Folded Reload
+; GFX8-NEXT: s_mov_b64 exec, s[4:5]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX900-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_immoffset:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX900-NEXT: s_add_i32 s6, s32, 0x201000
+; GFX900-NEXT: buffer_store_dword v1, off, s[0:3], s6 ; 4-byte Folded Spill
+; GFX900-NEXT: s_mov_b64 exec, s[4:5]
; GFX900-NEXT: s_lshr_b32 s4, s32, 6
-; GFX900-NEXT: s_add_i32 s59, s4, 0x442c
+; GFX900-NEXT: v_writelane_b32 v1, s55, 0
+; GFX900-NEXT: s_add_i32 s55, s4, 0x442c
; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s32
; GFX900-NEXT: v_add_u32_e32 v0, 64, v0
; GFX900-NEXT: ;;#ASMSTART
@@ -955,14 +1566,25 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc__gep_immoffset(
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_and_b64 s[4:5], 0, exec
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s59, scc
+; GFX900-NEXT: ; use s55, scc
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_readlane_b32 s55, v1, 0
+; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX900-NEXT: s_add_i32 s6, s32, 0x201000
+; GFX900-NEXT: buffer_load_dword v1, off, s[0:3], s6 ; 4-byte Folded Reload
+; GFX900-NEXT: s_mov_b64 exec, s[4:5]
+; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_immoffset:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: s_add_i32 s59, s32, 0x442c
+; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1
+; GFX942-NEXT: s_add_i32 s2, s32, 0x8040
+; GFX942-NEXT: scratch_store_dword off, v1, s2 ; 4-byte Folded Spill
+; GFX942-NEXT: s_mov_b64 exec, s[0:1]
+; GFX942-NEXT: v_writelane_b32 v1, s55, 0
+; GFX942-NEXT: s_add_i32 s55, s32, 0x442c
; GFX942-NEXT: s_add_i32 s0, s32, 64
; GFX942-NEXT: v_mov_b32_e32 v0, s0
; GFX942-NEXT: ;;#ASMSTART
@@ -970,14 +1592,20 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc__gep_immoffset(
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_and_b64 s[0:1], 0, exec
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s59, scc
+; GFX942-NEXT: ; use s55, scc
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: v_readlane_b32 s55, v1, 0
+; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1
+; GFX942-NEXT: s_add_i32 s2, s32, 0x8040
+; GFX942-NEXT: scratch_load_dword v1, off, s2 ; 4-byte Folded Reload
+; GFX942-NEXT: s_mov_b64 exec, s[0:1]
+; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%alloca0 = alloca [4096 x i32], align 64, addrspace(5)
%alloca1 = alloca [4096 x i32], align 4, addrspace(5)
%alloca1.offset = getelementptr [4096 x i32], ptr addrspace(5) %alloca1, i32 0, i32 251
call void asm sideeffect "; use alloca0 $0", "v"(ptr addrspace(5) %alloca0)
- call void asm sideeffect "; use $0, $1", "{s59},{scc}"(ptr addrspace(5) %alloca1.offset, i32 0)
+ call void asm sideeffect "; use $0, $1", "{s55},{scc}"(ptr addrspace(5) %alloca1.offset, i32 0)
ret void
}
@@ -985,54 +1613,89 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc__gep_sgpr_offse
; GFX10_1-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_sgpr_offset:
; GFX10_1: ; %bb.0:
; GFX10_1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1
+; GFX10_1-NEXT: s_add_i32 s5, s32, 0x100800
+; GFX10_1-NEXT: buffer_store_dword v1, off, s[0:3], s5 ; 4-byte Folded Spill
+; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3
+; GFX10_1-NEXT: s_mov_b32 exec_lo, s4
+; GFX10_1-NEXT: v_writelane_b32 v1, s55, 0
; GFX10_1-NEXT: v_lshrrev_b32_e64 v0, 5, s32
; GFX10_1-NEXT: s_lshl_b32 s4, s16, 2
-; GFX10_1-NEXT: s_lshr_b32 s59, s32, 5
-; GFX10_1-NEXT: s_add_i32 s59, s59, s4
+; GFX10_1-NEXT: s_lshr_b32 s55, s32, 5
+; GFX10_1-NEXT: s_add_i32 s55, s55, s4
; GFX10_1-NEXT: v_add_nc_u32_e32 v0, 64, v0
-; GFX10_1-NEXT: s_addk_i32 s59, 0x4040
+; GFX10_1-NEXT: s_addk_i32 s55, 0x4040
; GFX10_1-NEXT: ;;#ASMSTART
; GFX10_1-NEXT: ; use alloca0 v0
; GFX10_1-NEXT: ;;#ASMEND
; GFX10_1-NEXT: s_and_b32 s4, 0, exec_lo
; GFX10_1-NEXT: ;;#ASMSTART
-; GFX10_1-NEXT: ; use s59, scc
+; GFX10_1-NEXT: ; use s55, scc
; GFX10_1-NEXT: ;;#ASMEND
+; GFX10_1-NEXT: v_readlane_b32 s55, v1, 0
+; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1
+; GFX10_1-NEXT: s_add_i32 s5, s32, 0x100800
+; GFX10_1-NEXT: buffer_load_dword v1, off, s[0:3], s5 ; 4-byte Folded Reload
+; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3
+; GFX10_1-NEXT: s_mov_b32 exec_lo, s4
+; GFX10_1-NEXT: s_waitcnt vmcnt(0)
; GFX10_1-NEXT: s_setpc_b64 s[30:31]
;
; GFX10_3-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_sgpr_offset:
; GFX10_3: ; %bb.0:
; GFX10_3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1
+; GFX10_3-NEXT: s_add_i32 s5, s32, 0x100800
+; GFX10_3-NEXT: buffer_store_dword v1, off, s[0:3], s5 ; 4-byte Folded Spill
+; GFX10_3-NEXT: s_mov_b32 exec_lo, s4
+; GFX10_3-NEXT: v_writelane_b32 v1, s55, 0
; GFX10_3-NEXT: v_lshrrev_b32_e64 v0, 5, s32
; GFX10_3-NEXT: s_lshl_b32 s4, s16, 2
-; GFX10_3-NEXT: s_lshr_b32 s59, s32, 5
-; GFX10_3-NEXT: s_add_i32 s59, s59, s4
+; GFX10_3-NEXT: s_lshr_b32 s55, s32, 5
+; GFX10_3-NEXT: s_add_i32 s55, s55, s4
; GFX10_3-NEXT: v_add_nc_u32_e32 v0, 64, v0
-; GFX10_3-NEXT: s_addk_i32 s59, 0x4040
+; GFX10_3-NEXT: s_addk_i32 s55, 0x4040
; GFX10_3-NEXT: ;;#ASMSTART
; GFX10_3-NEXT: ; use alloca0 v0
; GFX10_3-NEXT: ;;#ASMEND
; GFX10_3-NEXT: s_and_b32 s4, 0, exec_lo
; GFX10_3-NEXT: ;;#ASMSTART
-; GFX10_3-NEXT: ; use s59, scc
+; GFX10_3-NEXT: ; use s55, scc
; GFX10_3-NEXT: ;;#ASMEND
+; GFX10_3-NEXT: v_readlane_b32 s55, v1, 0
+; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1
+; GFX10_3-NEXT: s_add_i32 s5, s32, 0x100800
+; GFX10_3-NEXT: buffer_load_dword v1, off, s[0:3], s5 ; 4-byte Folded Reload
+; GFX10_3-NEXT: s_mov_b32 exec_lo, s4
+; GFX10_3-NEXT: s_waitcnt vmcnt(0)
; GFX10_3-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_sgpr_offset:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: s_xor_saveexec_b32 s1, -1
+; GFX11-NEXT: s_add_i32 s2, s32, 0x8040
+; GFX11-NEXT: scratch_store_b32 off, v1, s2 ; 4-byte Folded Spill
+; GFX11-NEXT: s_mov_b32 exec_lo, s1
; GFX11-NEXT: s_add_i32 s1, s32, 64
+; GFX11-NEXT: v_writelane_b32 v1, s55, 0
; GFX11-NEXT: s_lshl_b32 s0, s0, 2
; GFX11-NEXT: v_mov_b32_e32 v0, s1
-; GFX11-NEXT: s_add_i32 s59, s32, s0
+; GFX11-NEXT: s_add_i32 s55, s32, s0
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; use alloca0 v0
; GFX11-NEXT: ;;#ASMEND
-; GFX11-NEXT: s_addk_i32 s59, 0x4040
+; GFX11-NEXT: s_addk_i32 s55, 0x4040
; GFX11-NEXT: s_and_b32 s0, 0, exec_lo
; GFX11-NEXT: ;;#ASMSTART
-; GFX11-NEXT: ; use s59, scc
+; GFX11-NEXT: ; use s55, scc
; GFX11-NEXT: ;;#ASMEND
+; GFX11-NEXT: v_readlane_b32 s55, v1, 0
+; GFX11-NEXT: s_xor_saveexec_b32 s0, -1
+; GFX11-NEXT: s_add_i32 s1, s32, 0x8040
+; GFX11-NEXT: scratch_load_b32 v1, off, s1 ; 4-byte Folded Reload
+; GFX11-NEXT: s_mov_b32 exec_lo, s0
+; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_sgpr_offset:
@@ -1042,29 +1705,44 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc__gep_sgpr_offse
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: s_xor_saveexec_b32 s1, -1
+; GFX12-NEXT: scratch_store_b32 off, v1, s32 offset:32768 ; 4-byte Folded Spill
+; GFX12-NEXT: s_wait_alu 0xfffe
+; GFX12-NEXT: s_mov_b32 exec_lo, s1
+; GFX12-NEXT: v_writelane_b32 v1, s55, 0
; GFX12-NEXT: s_lshl_b32 s0, s0, 2
; GFX12-NEXT: v_mov_b32_e32 v0, s32
; GFX12-NEXT: s_wait_alu 0xfffe
-; GFX12-NEXT: s_add_co_i32 s59, s32, s0
+; GFX12-NEXT: s_add_co_i32 s55, s32, s0
; GFX12-NEXT: ;;#ASMSTART
; GFX12-NEXT: ; use alloca0 v0
; GFX12-NEXT: ;;#ASMEND
; GFX12-NEXT: s_wait_alu 0xfffe
-; GFX12-NEXT: s_addk_co_i32 s59, 0x4000
+; GFX12-NEXT: s_addk_co_i32 s55, 0x4000
; GFX12-NEXT: s_and_b32 s0, 0, exec_lo
; GFX12-NEXT: ;;#ASMSTART
-; GFX12-NEXT: ; use s59, scc
+; GFX12-NEXT: ; use s55, scc
; GFX12-NEXT: ;;#ASMEND
+; GFX12-NEXT: v_readlane_b32 s55, v1, 0
+; GFX12-NEXT: s_xor_saveexec_b32 s0, -1
+; GFX12-NEXT: scratch_load_b32 v1, off, s32 offset:32768 ; 4-byte Folded Reload
; GFX12-NEXT: s_wait_alu 0xfffe
+; GFX12-NEXT: s_mov_b32 exec_lo, s0
+; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_sgpr_offset:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX8-NEXT: s_add_i32 s6, s32, 0x201000
+; GFX8-NEXT: buffer_store_dword v1, off, s[0:3], s6 ; 4-byte Folded Spill
+; GFX8-NEXT: s_mov_b64 exec, s[4:5]
+; GFX8-NEXT: v_writelane_b32 v1, s55, 0
; GFX8-NEXT: s_lshl_b32 s4, s16, 2
-; GFX8-NEXT: s_lshr_b32 s59, s32, 6
-; GFX8-NEXT: s_add_i32 s59, s59, s4
-; GFX8-NEXT: s_addk_i32 s59, 0x4040
+; GFX8-NEXT: s_lshr_b32 s55, s32, 6
+; GFX8-NEXT: s_add_i32 s55, s55, s4
+; GFX8-NEXT: s_addk_i32 s55, 0x4040
; GFX8-NEXT: v_lshrrev_b32_e64 v0, 6, s32
; GFX8-NEXT: v_add_u32_e32 v0, vcc, 64, v0
; GFX8-NEXT: ;;#ASMSTART
@@ -1072,17 +1750,28 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc__gep_sgpr_offse
; GFX8-NEXT: ;;#ASMEND
; GFX8-NEXT: s_and_b64 s[4:5], 0, exec
; GFX8-NEXT: ;;#ASMSTART
-; GFX8-NEXT: ; use s59, scc
+; GFX8-NEXT: ; use s55, scc
; GFX8-NEXT: ;;#ASMEND
+; GFX8-NEXT: v_readlane_b32 s55, v1, 0
+; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX8-NEXT: s_add_i32 s6, s32, 0x201000
+; GFX8-NEXT: buffer_load_dword v1, off, s[0:3], s6 ; 4-byte Folded Reload
+; GFX8-NEXT: s_mov_b64 exec, s[4:5]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX900-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_sgpr_offset:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX900-NEXT: s_add_i32 s6, s32, 0x201000
+; GFX900-NEXT: buffer_store_dword v1, off, s[0:3], s6 ; 4-byte Folded Spill
+; GFX900-NEXT: s_mov_b64 exec, s[4:5]
+; GFX900-NEXT: v_writelane_b32 v1, s55, 0
; GFX900-NEXT: s_lshl_b32 s4, s16, 2
-; GFX900-NEXT: s_lshr_b32 s59, s32, 6
-; GFX900-NEXT: s_add_i32 s59, s59, s4
-; GFX900-NEXT: s_addk_i32 s59, 0x4040
+; GFX900-NEXT: s_lshr_b32 s55, s32, 6
+; GFX900-NEXT: s_add_i32 s55, s55, s4
+; GFX900-NEXT: s_addk_i32 s55, 0x4040
; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s32
; GFX900-NEXT: v_add_u32_e32 v0, 64, v0
; GFX900-NEXT: ;;#ASMSTART
@@ -1090,16 +1779,27 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc__gep_sgpr_offse
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_and_b64 s[4:5], 0, exec
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s59, scc
+; GFX900-NEXT: ; use s55, scc
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_readlane_b32 s55, v1, 0
+; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX900-NEXT: s_add_i32 s6, s32, 0x201000
+; GFX900-NEXT: buffer_load_dword v1, off, s[0:3], s6 ; 4-byte Folded Reload
+; GFX900-NEXT: s_mov_b64 exec, s[4:5]
+; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_sgpr_offset:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: s_xor_saveexec_b64 s[2:3], -1
+; GFX942-NEXT: s_add_i32 s1, s32, 0x8040
+; GFX942-NEXT: scratch_store_dword off, v1, s1 ; 4-byte Folded Spill
+; GFX942-NEXT: s_mov_b64 exec, s[2:3]
; GFX942-NEXT: s_lshl_b32 s0, s0, 2
-; GFX942-NEXT: s_add_i32 s59, s32, s0
-; GFX942-NEXT: s_addk_i32 s59, 0x4040
+; GFX942-NEXT: v_writelane_b32 v1, s55, 0
+; GFX942-NEXT: s_add_i32 s55, s32, s0
+; GFX942-NEXT: s_addk_i32 s55, 0x4040
; GFX942-NEXT: s_add_i32 s0, s32, 64
; GFX942-NEXT: v_mov_b32_e32 v0, s0
; GFX942-NEXT: ;;#ASMSTART
@@ -1107,14 +1807,20 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc__gep_sgpr_offse
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_and_b64 s[0:1], 0, exec
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s59, scc
+; GFX942-NEXT: ; use s55, scc
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: v_readlane_b32 s55, v1, 0
+; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1
+; GFX942-NEXT: s_add_i32 s2, s32, 0x8040
+; GFX942-NEXT: scratch_load_dword v1, off, s2 ; 4-byte Folded Reload
+; GFX942-NEXT: s_mov_b64 exec, s[0:1]
+; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%alloca0 = alloca [4096 x i32], align 64, addrspace(5)
%alloca1 = alloca [4096 x i32], align 4, addrspace(5)
%alloca1.offset = getelementptr [4096 x i32], ptr addrspace(5) %alloca1, i32 0, i32 %soffset
call void asm sideeffect "; use alloca0 $0", "v"(ptr addrspace(5) %alloca0)
- call void asm sideeffect "; use $0, $1", "{s59},{scc}"(ptr addrspace(5) %alloca1.offset, i32 0)
+ call void asm sideeffect "; use $0, $1", "{s55},{scc}"(ptr addrspace(5) %alloca1.offset, i32 0)
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.ll b/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.ll
index e8dacc93a8f3c..17581bcb61e99 100644
--- a/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.ll
@@ -67,11 +67,11 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0
; GFX7-NEXT: v_mov_b32_e32 v0, 0x4040
; GFX7-NEXT: v_mad_u32_u24 v0, v0, 64, s32
; GFX7-NEXT: v_lshrrev_b32_e32 v0, 6, v0
-; GFX7-NEXT: v_readfirstlane_b32 s59, v0
+; GFX7-NEXT: v_readfirstlane_b32 s54, v0
; GFX7-NEXT: buffer_load_dword v0, off, s[0:3], s32
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: ;;#ASMSTART
-; GFX7-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s59, scc
+; GFX7-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s54, scc
; GFX7-NEXT: ;;#ASMEND
; GFX7-NEXT: v_readlane_b32 s55, v23, 16
; GFX7-NEXT: v_readlane_b32 s54, v23, 15
@@ -133,12 +133,13 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0
; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], s32
; GFX8-NEXT: v_mov_b32_e32 v0, 0x4040
; GFX8-NEXT: v_mad_u32_u24 v0, v0, 64, s32
+; GFX8-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
; GFX8-NEXT: v_lshrrev_b32_e32 v0, 6, v0
-; GFX8-NEXT: v_readfirstlane_b32 s59, v0
+; GFX8-NEXT: v_readfirstlane_b32 s54, v0
; GFX8-NEXT: buffer_load_dword v0, off, s[0:3], s32
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: ;;#ASMSTART
-; GFX8-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s59, scc
+; GFX8-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s54, scc
; GFX8-NEXT: ;;#ASMEND
; GFX8-NEXT: v_readlane_b32 s55, v23, 16
; GFX8-NEXT: v_readlane_b32 s54, v23, 15
@@ -199,12 +200,13 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s32
; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s32
+; GFX900-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
; GFX900-NEXT: v_add_u32_e32 v0, 0x4040, v0
-; GFX900-NEXT: v_readfirstlane_b32 s59, v0
+; GFX900-NEXT: v_readfirstlane_b32 s54, v0
; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s32
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s59, scc
+; GFX900-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s54, scc
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_readlane_b32 s55, v23, 16
; GFX900-NEXT: v_readlane_b32 s54, v23, 15
@@ -263,12 +265,13 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_addc_u32 s60, s32, 0x4040
-; GFX942-NEXT: s_bitcmp1_b32 s60, 0
-; GFX942-NEXT: s_bitset0_b32 s60, 0
-; GFX942-NEXT: s_mov_b32 s59, s60
+; GFX942-NEXT: s_addc_u32 s59, s32, 0x4040
+; GFX942-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
+; GFX942-NEXT: s_bitcmp1_b32 s59, 0
+; GFX942-NEXT: s_bitset0_b32 s59, 0
+; GFX942-NEXT: s_mov_b32 s54, s59
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s59, scc
+; GFX942-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s54, scc
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_readlane_b32 s55, v23, 16
; GFX942-NEXT: v_readlane_b32 s54, v23, 15
@@ -329,10 +332,11 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0
; GFX10_1-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc
; GFX10_1-NEXT: ;;#ASMEND
; GFX10_1-NEXT: v_lshrrev_b32_e64 v24, 5, s32
+; GFX10_1-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
; GFX10_1-NEXT: v_add_nc_u32_e32 v24, 0x4040, v24
-; GFX10_1-NEXT: v_readfirstlane_b32 s59, v24
+; GFX10_1-NEXT: v_readfirstlane_b32 s54, v24
; GFX10_1-NEXT: ;;#ASMSTART
-; GFX10_1-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s59, scc
+; GFX10_1-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s54, scc
; GFX10_1-NEXT: ;;#ASMEND
; GFX10_1-NEXT: v_readlane_b32 s55, v23, 16
; GFX10_1-NEXT: v_readlane_b32 s54, v23, 15
@@ -393,10 +397,11 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0
; GFX10_3-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc
; GFX10_3-NEXT: ;;#ASMEND
; GFX10_3-NEXT: v_lshrrev_b32_e64 v24, 5, s32
+; GFX10_3-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
; GFX10_3-NEXT: v_add_nc_u32_e32 v24, 0x4040, v24
-; GFX10_3-NEXT: v_readfirstlane_b32 s59, v24
+; GFX10_3-NEXT: v_readfirstlane_b32 s54, v24
; GFX10_3-NEXT: ;;#ASMSTART
-; GFX10_3-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s59, scc
+; GFX10_3-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s54, scc
; GFX10_3-NEXT: ;;#ASMEND
; GFX10_3-NEXT: v_readlane_b32 s55, v23, 16
; GFX10_3-NEXT: v_readlane_b32 s54, v23, 15
@@ -456,13 +461,14 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc
; GFX11-NEXT: ;;#ASMEND
-; GFX11-NEXT: s_addc_u32 s60, s32, 0x4040
+; GFX11-NEXT: s_addc_u32 s59, s32, 0x4040
+; GFX11-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
-; GFX11-NEXT: s_bitcmp1_b32 s60, 0
-; GFX11-NEXT: s_bitset0_b32 s60, 0
-; GFX11-NEXT: s_mov_b32 s59, s60
+; GFX11-NEXT: s_bitcmp1_b32 s59, 0
+; GFX11-NEXT: s_bitset0_b32 s59, 0
+; GFX11-NEXT: s_mov_b32 s54, s59
; GFX11-NEXT: ;;#ASMSTART
-; GFX11-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s59, scc
+; GFX11-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s54, scc
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: v_readlane_b32 s55, v23, 16
; GFX11-NEXT: v_readlane_b32 s54, v23, 15
@@ -524,14 +530,15 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0
; GFX12-NEXT: ;;#ASMSTART
; GFX12-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc
; GFX12-NEXT: ;;#ASMEND
-; GFX12-NEXT: s_add_co_ci_u32 s60, s32, 0x4000
+; GFX12-NEXT: s_add_co_ci_u32 s59, s32, 0x4000
+; GFX12-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
; GFX12-NEXT: s_wait_alu 0xfffe
-; GFX12-NEXT: s_bitcmp1_b32 s60, 0
-; GFX12-NEXT: s_bitset0_b32 s60, 0
+; GFX12-NEXT: s_bitcmp1_b32 s59, 0
+; GFX12-NEXT: s_bitset0_b32 s59, 0
; GFX12-NEXT: s_wait_alu 0xfffe
-; GFX12-NEXT: s_mov_b32 s59, s60
+; GFX12-NEXT: s_mov_b32 s54, s59
; GFX12-NEXT: ;;#ASMSTART
-; GFX12-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s59, scc
+; GFX12-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s54, scc
; GFX12-NEXT: ;;#ASMEND
; GFX12-NEXT: v_readlane_b32 s55, v23, 16
; GFX12-NEXT: v_readlane_b32 s54, v23, 15
@@ -579,7 +586,7 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0
; scc is unavailable since it is live in
call void asm sideeffect "; use $0, $1, $2, $3, $4, $5, $6, $7, $8, $9, $10",
- "{s[0:15]},{s[16:31]},{s[32:47]},{s[48:55]},{s[56:57]},{s58},{v[0:15]},{v[16:22]},{vcc},{s59},{scc}"(
+ "{s[0:15]},{s[16:31]},{s[32:47]},{s[48:55]},{s[56:57]},{s58},{v[0:15]},{v[16:22]},{vcc},{s54},{scc}"(
<16 x i32> %s0,
<16 x i32> %s1,
<16 x i32> %s2,
@@ -629,9 +636,9 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe
; GFX7-NEXT: ;;#ASMEND
; GFX7-NEXT: v_mad_u32_u24 v22, 16, 64, s32
; GFX7-NEXT: v_lshrrev_b32_e32 v22, 6, v22
-; GFX7-NEXT: v_readfirstlane_b32 s59, v22
+; GFX7-NEXT: v_readfirstlane_b32 s54, v22
; GFX7-NEXT: ;;#ASMSTART
-; GFX7-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s59, scc
+; GFX7-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s54, scc
; GFX7-NEXT: ;;#ASMEND
; GFX7-NEXT: v_readlane_b32 s55, v21, 16
; GFX7-NEXT: v_readlane_b32 s54, v21, 15
@@ -686,10 +693,11 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe
; GFX8-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc
; GFX8-NEXT: ;;#ASMEND
; GFX8-NEXT: v_mad_u32_u24 v22, 16, 64, s32
+; GFX8-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
; GFX8-NEXT: v_lshrrev_b32_e32 v22, 6, v22
-; GFX8-NEXT: v_readfirstlane_b32 s59, v22
+; GFX8-NEXT: v_readfirstlane_b32 s54, v22
; GFX8-NEXT: ;;#ASMSTART
-; GFX8-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s59, scc
+; GFX8-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s54, scc
; GFX8-NEXT: ;;#ASMEND
; GFX8-NEXT: v_readlane_b32 s55, v21, 16
; GFX8-NEXT: v_readlane_b32 s54, v21, 15
@@ -744,10 +752,11 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe
; GFX900-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_lshrrev_b32_e64 v22, 6, s32
+; GFX900-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
; GFX900-NEXT: v_add_u32_e32 v22, 16, v22
-; GFX900-NEXT: v_readfirstlane_b32 s59, v22
+; GFX900-NEXT: v_readfirstlane_b32 s54, v22
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s59, scc
+; GFX900-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s54, scc
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_readlane_b32 s55, v21, 16
; GFX900-NEXT: v_readlane_b32 s54, v21, 15
@@ -801,12 +810,13 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_addc_u32 s60, s32, 16
-; GFX942-NEXT: s_bitcmp1_b32 s60, 0
-; GFX942-NEXT: s_bitset0_b32 s60, 0
-; GFX942-NEXT: s_mov_b32 s59, s60
+; GFX942-NEXT: s_addc_u32 s59, s32, 16
+; GFX942-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
+; GFX942-NEXT: s_bitcmp1_b32 s59, 0
+; GFX942-NEXT: s_bitset0_b32 s59, 0
+; GFX942-NEXT: s_mov_b32 s54, s59
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s59, scc
+; GFX942-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s54, scc
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_readlane_b32 s55, v21, 16
; GFX942-NEXT: v_readlane_b32 s54, v21, 15
@@ -862,10 +872,11 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe
; GFX10_1-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc
; GFX10_1-NEXT: ;;#ASMEND
; GFX10_1-NEXT: v_lshrrev_b32_e64 v22, 5, s32
+; GFX10_1-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
; GFX10_1-NEXT: v_add_nc_u32_e32 v22, 16, v22
-; GFX10_1-NEXT: v_readfirstlane_b32 s59, v22
+; GFX10_1-NEXT: v_readfirstlane_b32 s54, v22
; GFX10_1-NEXT: ;;#ASMSTART
-; GFX10_1-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s59, scc
+; GFX10_1-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s54, scc
; GFX10_1-NEXT: ;;#ASMEND
; GFX10_1-NEXT: v_readlane_b32 s55, v21, 16
; GFX10_1-NEXT: v_readlane_b32 s54, v21, 15
@@ -921,10 +932,11 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe
; GFX10_3-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc
; GFX10_3-NEXT: ;;#ASMEND
; GFX10_3-NEXT: v_lshrrev_b32_e64 v22, 5, s32
+; GFX10_3-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
; GFX10_3-NEXT: v_add_nc_u32_e32 v22, 16, v22
-; GFX10_3-NEXT: v_readfirstlane_b32 s59, v22
+; GFX10_3-NEXT: v_readfirstlane_b32 s54, v22
; GFX10_3-NEXT: ;;#ASMSTART
-; GFX10_3-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s59, scc
+; GFX10_3-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s54, scc
; GFX10_3-NEXT: ;;#ASMEND
; GFX10_3-NEXT: v_readlane_b32 s55, v21, 16
; GFX10_3-NEXT: v_readlane_b32 s54, v21, 15
@@ -978,13 +990,14 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc
; GFX11-NEXT: ;;#ASMEND
-; GFX11-NEXT: s_addc_u32 s60, s32, 16
+; GFX11-NEXT: s_addc_u32 s59, s32, 16
+; GFX11-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
-; GFX11-NEXT: s_bitcmp1_b32 s60, 0
-; GFX11-NEXT: s_bitset0_b32 s60, 0
-; GFX11-NEXT: s_mov_b32 s59, s60
+; GFX11-NEXT: s_bitcmp1_b32 s59, 0
+; GFX11-NEXT: s_bitset0_b32 s59, 0
+; GFX11-NEXT: s_mov_b32 s54, s59
; GFX11-NEXT: ;;#ASMSTART
-; GFX11-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s59, scc
+; GFX11-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s54, scc
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: v_readlane_b32 s55, v21, 16
; GFX11-NEXT: v_readlane_b32 s54, v21, 15
@@ -1042,9 +1055,10 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe
; GFX12-NEXT: ;;#ASMSTART
; GFX12-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc
; GFX12-NEXT: ;;#ASMEND
-; GFX12-NEXT: s_mov_b32 s59, s32
+; GFX12-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
+; GFX12-NEXT: s_mov_b32 s54, s32
; GFX12-NEXT: ;;#ASMSTART
-; GFX12-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s59, scc
+; GFX12-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s54, scc
; GFX12-NEXT: ;;#ASMEND
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-NEXT: v_readlane_b32 s55, v21, 16
@@ -1091,7 +1105,7 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe
; scc is unavailable since it is live in
call void asm sideeffect "; use $0, $1, $2, $3, $4, $5, $6, $7, $8, $9, $10",
- "{s[0:15]},{s[16:31]},{s[32:47]},{s[48:55]},{s[56:57]},{s58},{v[0:15]},{v[16:20]},{vcc},{s59},{scc}"(
+ "{s[0:15]},{s[16:31]},{s[32:47]},{s[48:55]},{s[56:57]},{s58},{v[0:15]},{v[16:20]},{vcc},{s54},{scc}"(
<16 x i32> %s0,
<16 x i32> %s1,
<16 x i32> %s2,
@@ -1151,9 +1165,9 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i
; GFX7-NEXT: ;;#ASMSTART
; GFX7-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc
; GFX7-NEXT: ;;#ASMEND
-; GFX7-NEXT: v_readlane_b32 s59, v22, 0
+; GFX7-NEXT: v_readlane_b32 s54, v22, 0
; GFX7-NEXT: ;;#ASMSTART
-; GFX7-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s59, scc
+; GFX7-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s54, scc
; GFX7-NEXT: ;;#ASMEND
; GFX7-NEXT: v_readlane_b32 s55, v23, 16
; GFX7-NEXT: v_readlane_b32 s54, v23, 15
@@ -1188,58 +1202,66 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1
; GFX8-NEXT: s_add_i32 s6, s32, 0x201000
+; GFX8-NEXT: buffer_store_dword v23, off, s[0:3], s6 ; 4-byte Folded Spill
+; GFX8-NEXT: s_add_i32 s6, s32, 0x201100
; GFX8-NEXT: buffer_store_dword v22, off, s[0:3], s6 ; 4-byte Folded Spill
; GFX8-NEXT: s_mov_b64 exec, s[4:5]
-; GFX8-NEXT: v_writelane_b32 v22, s30, 0
-; GFX8-NEXT: v_writelane_b32 v22, s31, 1
-; GFX8-NEXT: v_writelane_b32 v22, s33, 2
-; GFX8-NEXT: v_writelane_b32 v22, s34, 3
-; GFX8-NEXT: v_writelane_b32 v22, s35, 4
-; GFX8-NEXT: v_writelane_b32 v22, s36, 5
-; GFX8-NEXT: v_writelane_b32 v22, s37, 6
-; GFX8-NEXT: v_writelane_b32 v22, s38, 7
-; GFX8-NEXT: v_writelane_b32 v22, s39, 8
-; GFX8-NEXT: v_writelane_b32 v22, s48, 9
-; GFX8-NEXT: v_writelane_b32 v22, s49, 10
-; GFX8-NEXT: v_writelane_b32 v22, s50, 11
-; GFX8-NEXT: v_writelane_b32 v22, s51, 12
-; GFX8-NEXT: v_writelane_b32 v22, s52, 13
-; GFX8-NEXT: s_lshr_b32 s4, s32, 6
-; GFX8-NEXT: v_writelane_b32 v22, s53, 14
+; GFX8-NEXT: v_writelane_b32 v23, s30, 0
+; GFX8-NEXT: v_writelane_b32 v23, s31, 1
+; GFX8-NEXT: v_writelane_b32 v23, s33, 2
+; GFX8-NEXT: v_writelane_b32 v23, s34, 3
+; GFX8-NEXT: v_writelane_b32 v23, s35, 4
+; GFX8-NEXT: v_writelane_b32 v23, s36, 5
+; GFX8-NEXT: v_writelane_b32 v23, s37, 6
+; GFX8-NEXT: v_writelane_b32 v23, s38, 7
+; GFX8-NEXT: v_writelane_b32 v23, s39, 8
+; GFX8-NEXT: v_writelane_b32 v23, s48, 9
+; GFX8-NEXT: v_writelane_b32 v23, s49, 10
+; GFX8-NEXT: v_writelane_b32 v23, s50, 11
+; GFX8-NEXT: v_writelane_b32 v23, s51, 12
+; GFX8-NEXT: v_writelane_b32 v23, s52, 13
+; GFX8-NEXT: s_lshr_b32 s5, s32, 6
+; GFX8-NEXT: v_writelane_b32 v23, s53, 14
; GFX8-NEXT: v_lshrrev_b32_e64 v0, 6, s32
-; GFX8-NEXT: s_add_i32 s59, s4, 0x4240
-; GFX8-NEXT: v_writelane_b32 v22, s54, 15
+; GFX8-NEXT: s_add_i32 s4, s5, 0x4240
+; GFX8-NEXT: ; implicit-def: $vgpr22 : SGPR spill to VGPR lane
+; GFX8-NEXT: v_writelane_b32 v23, s54, 15
; GFX8-NEXT: v_add_u32_e32 v0, vcc, 64, v0
+; GFX8-NEXT: v_writelane_b32 v22, s4, 0
; GFX8-NEXT: s_and_b64 s[4:5], 0, exec
-; GFX8-NEXT: v_writelane_b32 v22, s55, 16
+; GFX8-NEXT: v_writelane_b32 v23, s55, 16
; GFX8-NEXT: ;;#ASMSTART
; GFX8-NEXT: ; use alloca0 v0
; GFX8-NEXT: ;;#ASMEND
; GFX8-NEXT: ;;#ASMSTART
; GFX8-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc
; GFX8-NEXT: ;;#ASMEND
+; GFX8-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
+; GFX8-NEXT: v_readlane_b32 s54, v22, 0
; GFX8-NEXT: ;;#ASMSTART
-; GFX8-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s59, scc
+; GFX8-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s54, scc
; GFX8-NEXT: ;;#ASMEND
-; GFX8-NEXT: v_readlane_b32 s55, v22, 16
-; GFX8-NEXT: v_readlane_b32 s54, v22, 15
-; GFX8-NEXT: v_readlane_b32 s53, v22, 14
-; GFX8-NEXT: v_readlane_b32 s52, v22, 13
-; GFX8-NEXT: v_readlane_b32 s51, v22, 12
-; GFX8-NEXT: v_readlane_b32 s50, v22, 11
-; GFX8-NEXT: v_readlane_b32 s49, v22, 10
-; GFX8-NEXT: v_readlane_b32 s48, v22, 9
-; GFX8-NEXT: v_readlane_b32 s39, v22, 8
-; GFX8-NEXT: v_readlane_b32 s38, v22, 7
-; GFX8-NEXT: v_readlane_b32 s37, v22, 6
-; GFX8-NEXT: v_readlane_b32 s36, v22, 5
-; GFX8-NEXT: v_readlane_b32 s35, v22, 4
-; GFX8-NEXT: v_readlane_b32 s34, v22, 3
-; GFX8-NEXT: v_readlane_b32 s33, v22, 2
-; GFX8-NEXT: v_readlane_b32 s31, v22, 1
-; GFX8-NEXT: v_readlane_b32 s30, v22, 0
+; GFX8-NEXT: v_readlane_b32 s55, v23, 16
+; GFX8-NEXT: v_readlane_b32 s54, v23, 15
+; GFX8-NEXT: v_readlane_b32 s53, v23, 14
+; GFX8-NEXT: v_readlane_b32 s52, v23, 13
+; GFX8-NEXT: v_readlane_b32 s51, v23, 12
+; GFX8-NEXT: v_readlane_b32 s50, v23, 11
+; GFX8-NEXT: v_readlane_b32 s49, v23, 10
+; GFX8-NEXT: v_readlane_b32 s48, v23, 9
+; GFX8-NEXT: v_readlane_b32 s39, v23, 8
+; GFX8-NEXT: v_readlane_b32 s38, v23, 7
+; GFX8-NEXT: v_readlane_b32 s37, v23, 6
+; GFX8-NEXT: v_readlane_b32 s36, v23, 5
+; GFX8-NEXT: v_readlane_b32 s35, v23, 4
+; GFX8-NEXT: v_readlane_b32 s34, v23, 3
+; GFX8-NEXT: v_readlane_b32 s33, v23, 2
+; GFX8-NEXT: v_readlane_b32 s31, v23, 1
+; GFX8-NEXT: v_readlane_b32 s30, v23, 0
; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1
; GFX8-NEXT: s_add_i32 s6, s32, 0x201000
+; GFX8-NEXT: buffer_load_dword v23, off, s[0:3], s6 ; 4-byte Folded Reload
+; GFX8-NEXT: s_add_i32 s6, s32, 0x201100
; GFX8-NEXT: buffer_load_dword v22, off, s[0:3], s6 ; 4-byte Folded Reload
; GFX8-NEXT: s_mov_b64 exec, s[4:5]
; GFX8-NEXT: s_waitcnt vmcnt(0)
@@ -1250,58 +1272,66 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
; GFX900-NEXT: s_add_i32 s6, s32, 0x201000
+; GFX900-NEXT: buffer_store_dword v23, off, s[0:3], s6 ; 4-byte Folded Spill
+; GFX900-NEXT: s_add_i32 s6, s32, 0x201100
; GFX900-NEXT: buffer_store_dword v22, off, s[0:3], s6 ; 4-byte Folded Spill
; GFX900-NEXT: s_mov_b64 exec, s[4:5]
-; GFX900-NEXT: v_writelane_b32 v22, s30, 0
-; GFX900-NEXT: v_writelane_b32 v22, s31, 1
-; GFX900-NEXT: v_writelane_b32 v22, s33, 2
-; GFX900-NEXT: v_writelane_b32 v22, s34, 3
-; GFX900-NEXT: v_writelane_b32 v22, s35, 4
-; GFX900-NEXT: v_writelane_b32 v22, s36, 5
-; GFX900-NEXT: v_writelane_b32 v22, s37, 6
-; GFX900-NEXT: v_writelane_b32 v22, s38, 7
-; GFX900-NEXT: v_writelane_b32 v22, s39, 8
-; GFX900-NEXT: v_writelane_b32 v22, s48, 9
-; GFX900-NEXT: v_writelane_b32 v22, s49, 10
-; GFX900-NEXT: v_writelane_b32 v22, s50, 11
-; GFX900-NEXT: v_writelane_b32 v22, s51, 12
-; GFX900-NEXT: v_writelane_b32 v22, s52, 13
-; GFX900-NEXT: s_lshr_b32 s4, s32, 6
-; GFX900-NEXT: v_writelane_b32 v22, s53, 14
+; GFX900-NEXT: v_writelane_b32 v23, s30, 0
+; GFX900-NEXT: v_writelane_b32 v23, s31, 1
+; GFX900-NEXT: v_writelane_b32 v23, s33, 2
+; GFX900-NEXT: v_writelane_b32 v23, s34, 3
+; GFX900-NEXT: v_writelane_b32 v23, s35, 4
+; GFX900-NEXT: v_writelane_b32 v23, s36, 5
+; GFX900-NEXT: v_writelane_b32 v23, s37, 6
+; GFX900-NEXT: v_writelane_b32 v23, s38, 7
+; GFX900-NEXT: v_writelane_b32 v23, s39, 8
+; GFX900-NEXT: v_writelane_b32 v23, s48, 9
+; GFX900-NEXT: v_writelane_b32 v23, s49, 10
+; GFX900-NEXT: v_writelane_b32 v23, s50, 11
+; GFX900-NEXT: v_writelane_b32 v23, s51, 12
+; GFX900-NEXT: v_writelane_b32 v23, s52, 13
+; GFX900-NEXT: s_lshr_b32 s5, s32, 6
+; GFX900-NEXT: v_writelane_b32 v23, s53, 14
; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s32
-; GFX900-NEXT: s_add_i32 s59, s4, 0x4240
-; GFX900-NEXT: v_writelane_b32 v22, s54, 15
+; GFX900-NEXT: s_add_i32 s4, s5, 0x4240
+; GFX900-NEXT: ; implicit-def: $vgpr22 : SGPR spill to VGPR lane
+; GFX900-NEXT: v_writelane_b32 v23, s54, 15
; GFX900-NEXT: v_add_u32_e32 v0, 64, v0
+; GFX900-NEXT: v_writelane_b32 v22, s4, 0
; GFX900-NEXT: s_and_b64 s[4:5], 0, exec
-; GFX900-NEXT: v_writelane_b32 v22, s55, 16
+; GFX900-NEXT: v_writelane_b32 v23, s55, 16
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use alloca0 v0
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
+; GFX900-NEXT: v_readlane_b32 s54, v22, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s59, scc
+; GFX900-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s54, scc
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_readlane_b32 s55, v22, 16
-; GFX900-NEXT: v_readlane_b32 s54, v22, 15
-; GFX900-NEXT: v_readlane_b32 s53, v22, 14
-; GFX900-NEXT: v_readlane_b32 s52, v22, 13
-; GFX900-NEXT: v_readlane_b32 s51, v22, 12
-; GFX900-NEXT: v_readlane_b32 s50, v22, 11
-; GFX900-NEXT: v_readlane_b32 s49, v22, 10
-; GFX900-NEXT: v_readlane_b32 s48, v22, 9
-; GFX900-NEXT: v_readlane_b32 s39, v22, 8
-; GFX900-NEXT: v_readlane_b32 s38, v22, 7
-; GFX900-NEXT: v_readlane_b32 s37, v22, 6
-; GFX900-NEXT: v_readlane_b32 s36, v22, 5
-; GFX900-NEXT: v_readlane_b32 s35, v22, 4
-; GFX900-NEXT: v_readlane_b32 s34, v22, 3
-; GFX900-NEXT: v_readlane_b32 s33, v22, 2
-; GFX900-NEXT: v_readlane_b32 s31, v22, 1
-; GFX900-NEXT: v_readlane_b32 s30, v22, 0
+; GFX900-NEXT: v_readlane_b32 s55, v23, 16
+; GFX900-NEXT: v_readlane_b32 s54, v23, 15
+; GFX900-NEXT: v_readlane_b32 s53, v23, 14
+; GFX900-NEXT: v_readlane_b32 s52, v23, 13
+; GFX900-NEXT: v_readlane_b32 s51, v23, 12
+; GFX900-NEXT: v_readlane_b32 s50, v23, 11
+; GFX900-NEXT: v_readlane_b32 s49, v23, 10
+; GFX900-NEXT: v_readlane_b32 s48, v23, 9
+; GFX900-NEXT: v_readlane_b32 s39, v23, 8
+; GFX900-NEXT: v_readlane_b32 s38, v23, 7
+; GFX900-NEXT: v_readlane_b32 s37, v23, 6
+; GFX900-NEXT: v_readlane_b32 s36, v23, 5
+; GFX900-NEXT: v_readlane_b32 s35, v23, 4
+; GFX900-NEXT: v_readlane_b32 s34, v23, 3
+; GFX900-NEXT: v_readlane_b32 s33, v23, 2
+; GFX900-NEXT: v_readlane_b32 s31, v23, 1
+; GFX900-NEXT: v_readlane_b32 s30, v23, 0
; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
; GFX900-NEXT: s_add_i32 s6, s32, 0x201000
+; GFX900-NEXT: buffer_load_dword v23, off, s[0:3], s6 ; 4-byte Folded Reload
+; GFX900-NEXT: s_add_i32 s6, s32, 0x201100
; GFX900-NEXT: buffer_load_dword v22, off, s[0:3], s6 ; 4-byte Folded Reload
; GFX900-NEXT: s_mov_b64 exec, s[4:5]
; GFX900-NEXT: s_waitcnt vmcnt(0)
@@ -1339,10 +1369,12 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_add_i32 s59, s32, 0x4240
+; GFX942-NEXT: s_add_i32 s58, s32, 0x4240
+; GFX942-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
; GFX942-NEXT: s_and_b64 s[60:61], 0, exec
+; GFX942-NEXT: s_mov_b32 s54, s58
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s59, scc
+; GFX942-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s54, scc
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_readlane_b32 s55, v22, 16
; GFX942-NEXT: v_readlane_b32 s54, v22, 15
@@ -1379,7 +1411,7 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i
; GFX10_1-NEXT: v_writelane_b32 v22, s30, 0
; GFX10_1-NEXT: v_lshrrev_b32_e64 v0, 5, s32
; GFX10_1-NEXT: s_lshr_b32 s4, s32, 5
-; GFX10_1-NEXT: s_add_i32 s59, s4, 0x4240
+; GFX10_1-NEXT: s_add_i32 s58, s4, 0x4240
; GFX10_1-NEXT: v_writelane_b32 v22, s31, 1
; GFX10_1-NEXT: v_add_nc_u32_e32 v0, 64, v0
; GFX10_1-NEXT: s_and_b32 s4, 0, exec_lo
@@ -1404,8 +1436,10 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i
; GFX10_1-NEXT: ;;#ASMSTART
; GFX10_1-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc
; GFX10_1-NEXT: ;;#ASMEND
+; GFX10_1-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
+; GFX10_1-NEXT: s_mov_b32 s54, s58
; GFX10_1-NEXT: ;;#ASMSTART
-; GFX10_1-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s59, scc
+; GFX10_1-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s54, scc
; GFX10_1-NEXT: ;;#ASMEND
; GFX10_1-NEXT: v_readlane_b32 s55, v22, 16
; GFX10_1-NEXT: v_readlane_b32 s54, v22, 15
@@ -1442,7 +1476,7 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i
; GFX10_3-NEXT: v_writelane_b32 v22, s30, 0
; GFX10_3-NEXT: v_lshrrev_b32_e64 v0, 5, s32
; GFX10_3-NEXT: s_lshr_b32 s4, s32, 5
-; GFX10_3-NEXT: s_add_i32 s59, s4, 0x4240
+; GFX10_3-NEXT: s_add_i32 s58, s4, 0x4240
; GFX10_3-NEXT: v_writelane_b32 v22, s31, 1
; GFX10_3-NEXT: v_add_nc_u32_e32 v0, 64, v0
; GFX10_3-NEXT: s_and_b32 s4, 0, exec_lo
@@ -1467,8 +1501,10 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i
; GFX10_3-NEXT: ;;#ASMSTART
; GFX10_3-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc
; GFX10_3-NEXT: ;;#ASMEND
+; GFX10_3-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
+; GFX10_3-NEXT: s_mov_b32 s54, s58
; GFX10_3-NEXT: ;;#ASMSTART
-; GFX10_3-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s59, scc
+; GFX10_3-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s54, scc
; GFX10_3-NEXT: ;;#ASMEND
; GFX10_3-NEXT: v_readlane_b32 s55, v22, 16
; GFX10_3-NEXT: v_readlane_b32 s54, v22, 15
@@ -1503,7 +1539,7 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i
; GFX11-NEXT: s_mov_b32 exec_lo, s0
; GFX11-NEXT: v_writelane_b32 v22, s30, 0
; GFX11-NEXT: s_add_i32 s0, s32, 64
-; GFX11-NEXT: s_add_i32 s59, s32, 0x4240
+; GFX11-NEXT: s_add_i32 s58, s32, 0x4240
; GFX11-NEXT: v_mov_b32_e32 v0, s0
; GFX11-NEXT: s_and_b32 s0, 0, exec_lo
; GFX11-NEXT: v_writelane_b32 v22, s31, 1
@@ -1528,8 +1564,10 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc
; GFX11-NEXT: ;;#ASMEND
+; GFX11-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
+; GFX11-NEXT: s_mov_b32 s54, s58
; GFX11-NEXT: ;;#ASMSTART
-; GFX11-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s59, scc
+; GFX11-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s54, scc
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_readlane_b32 s55, v22, 16
@@ -1568,7 +1606,7 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: s_mov_b32 exec_lo, s0
; GFX12-NEXT: v_writelane_b32 v22, s30, 0
-; GFX12-NEXT: s_add_co_i32 s59, s32, 0x4200
+; GFX12-NEXT: s_add_co_i32 s58, s32, 0x4200
; GFX12-NEXT: v_mov_b32_e32 v0, s32
; GFX12-NEXT: s_and_b32 s0, 0, exec_lo
; GFX12-NEXT: ;;#ASMSTART
@@ -1593,10 +1631,12 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i
; GFX12-NEXT: ;;#ASMSTART
; GFX12-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc
; GFX12-NEXT: ;;#ASMEND
+; GFX12-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
+; GFX12-NEXT: s_wait_alu 0xfffe
+; GFX12-NEXT: s_mov_b32 s54, s58
; GFX12-NEXT: ;;#ASMSTART
-; GFX12-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s59, scc
+; GFX12-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s54, scc
; GFX12-NEXT: ;;#ASMEND
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-NEXT: v_readlane_b32 s55, v22, 16
; GFX12-NEXT: v_readlane_b32 s54, v22, 15
; GFX12-NEXT: v_readlane_b32 s53, v22, 14
@@ -1644,7 +1684,7 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i
; scc is unavailable since it is live in
call void asm sideeffect "; use $0, $1, $2, $3, $4, $5, $6, $7, $8, $9",
- "{s[0:15]},{s[16:31]},{s[32:47]},{s[48:55]},{s[56:57]},{v[0:15]},{v[16:21]},{vcc},{s59},{scc}"(
+ "{s[0:15]},{s[16:31]},{s[32:47]},{s[48:55]},{s[56:57]},{v[0:15]},{v[16:21]},{vcc},{s54},{scc}"(
<16 x i32> %s0,
<16 x i32> %s1,
<16 x i32> %s2,
diff --git a/llvm/test/CodeGen/AMDGPU/schedule-amdgpu-tracker-physreg-crash.ll b/llvm/test/CodeGen/AMDGPU/schedule-amdgpu-tracker-physreg-crash.ll
index 79187f51af0d2..f70cd6816a966 100644
--- a/llvm/test/CodeGen/AMDGPU/schedule-amdgpu-tracker-physreg-crash.ll
+++ b/llvm/test/CodeGen/AMDGPU/schedule-amdgpu-tracker-physreg-crash.ll
@@ -44,7 +44,7 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0
; scc is unavailable since it is live in
call void asm sideeffect "; use $0, $1, $2, $3, $4, $5, $6, $7, $8, $9, $10",
- "{s[0:15]},{s[16:31]},{s[32:47]},{s[48:55]},{s[56:57]},{s58},{v[0:15]},{v[16:22]},{vcc},{s59},{scc}"(
+ "{s[0:15]},{s[16:31]},{s[32:47]},{s[48:55]},{s[56:57]},{s58},{v[0:15]},{v[16:22]},{vcc},{s64},{scc}"(
<16 x i32> %s0,
<16 x i32> %s1,
<16 x i32> %s2,
More information about the llvm-commits
mailing list