[llvm] [AMDGPU] Fix test failures when expensive checks are enabled (PR #130644)
Shilei Tian via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 10 10:46:22 PDT 2025
================
@@ -369,112 +570,181 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_fp() #1 {
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: s_mov_b32 s6, s33
; GFX8-NEXT: s_mov_b32 s33, s32
+; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX8-NEXT: s_add_i32 s7, s33, 0x101100
+; GFX8-NEXT: buffer_store_dword v1, off, s[0:3], s7 ; 4-byte Folded Spill
+; GFX8-NEXT: s_mov_b64 exec, s[4:5]
; GFX8-NEXT: v_lshrrev_b32_e64 v0, 6, s33
; GFX8-NEXT: v_add_u32_e32 v0, vcc, 64, v0
+; GFX8-NEXT: v_writelane_b32 v1, s55, 0
; GFX8-NEXT: ;;#ASMSTART
; GFX8-NEXT: ; use alloca0 v0
; GFX8-NEXT: ;;#ASMEND
; GFX8-NEXT: v_lshrrev_b32_e64 v0, 6, s33
-; GFX8-NEXT: s_movk_i32 s59, 0x4040
+; GFX8-NEXT: s_movk_i32 s55, 0x4040
+; GFX8-NEXT: v_add_u32_e32 v0, vcc, s55, v0
; GFX8-NEXT: s_add_i32 s32, s32, 0x102000
-; GFX8-NEXT: v_add_u32_e32 v0, vcc, s59, v0
+; GFX8-NEXT: v_readfirstlane_b32 s55, v0
; GFX8-NEXT: s_and_b64 s[4:5], 0, exec
-; GFX8-NEXT: v_readfirstlane_b32 s59, v0
; GFX8-NEXT: ;;#ASMSTART
-; GFX8-NEXT: ; use s59, scc
+; GFX8-NEXT: ; use s55, scc
; GFX8-NEXT: ;;#ASMEND
+; GFX8-NEXT: v_readlane_b32 s55, v1, 0
; GFX8-NEXT: s_mov_b32 s32, s33
+; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX8-NEXT: s_add_i32 s7, s33, 0x101100
+; GFX8-NEXT: buffer_load_dword v1, off, s[0:3], s7 ; 4-byte Folded Reload
+; GFX8-NEXT: s_mov_b64 exec, s[4:5]
; GFX8-NEXT: s_mov_b32 s33, s6
+; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX900-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_fp:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: s_mov_b32 s6, s33
; GFX900-NEXT: s_mov_b32 s33, s32
+; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX900-NEXT: s_add_i32 s7, s33, 0x101100
+; GFX900-NEXT: buffer_store_dword v1, off, s[0:3], s7 ; 4-byte Folded Spill
+; GFX900-NEXT: s_mov_b64 exec, s[4:5]
; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s33
; GFX900-NEXT: v_add_u32_e32 v0, 64, v0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use alloca0 v0
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s33
-; GFX900-NEXT: s_add_i32 s32, s32, 0x102000
; GFX900-NEXT: v_add_u32_e32 v0, 0x4040, v0
+; GFX900-NEXT: s_add_i32 s32, s32, 0x102000
+; GFX900-NEXT: v_writelane_b32 v1, s55, 0
+; GFX900-NEXT: v_readfirstlane_b32 s55, v0
; GFX900-NEXT: s_and_b64 s[4:5], 0, exec
-; GFX900-NEXT: v_readfirstlane_b32 s59, v0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s59, scc
+; GFX900-NEXT: ; use s55, scc
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_readlane_b32 s55, v1, 0
; GFX900-NEXT: s_mov_b32 s32, s33
+; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX900-NEXT: s_add_i32 s7, s33, 0x101100
+; GFX900-NEXT: buffer_load_dword v1, off, s[0:3], s7 ; 4-byte Folded Reload
+; GFX900-NEXT: s_mov_b64 exec, s[4:5]
; GFX900-NEXT: s_mov_b32 s33, s6
+; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_fp:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: s_mov_b32 s2, s33
; GFX942-NEXT: s_mov_b32 s33, s32
+; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1
+; GFX942-NEXT: s_add_i32 s3, s33, 0x4044
+; GFX942-NEXT: scratch_store_dword off, v1, s3 ; 4-byte Folded Spill
+; GFX942-NEXT: s_mov_b64 exec, s[0:1]
; GFX942-NEXT: s_addk_i32 s32, 0x4080
; GFX942-NEXT: s_add_i32 s0, s33, 64
; GFX942-NEXT: v_mov_b32_e32 v0, s0
; GFX942-NEXT: s_and_b64 s[0:1], 0, exec
; GFX942-NEXT: s_addc_u32 s0, s33, 0x4040
; GFX942-NEXT: s_bitcmp1_b32 s0, 0
; GFX942-NEXT: s_bitset0_b32 s0, 0
+; GFX942-NEXT: v_writelane_b32 v1, s55, 0
+; GFX942-NEXT: s_mov_b32 s55, s0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use alloca0 v0
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s59, s0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s59, scc
+; GFX942-NEXT: ; use s55, scc
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: v_readlane_b32 s55, v1, 0
; GFX942-NEXT: s_mov_b32 s32, s33
+; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1
+; GFX942-NEXT: s_add_i32 s3, s33, 0x4044
+; GFX942-NEXT: scratch_load_dword v1, off, s3 ; 4-byte Folded Reload
+; GFX942-NEXT: s_mov_b64 exec, s[0:1]
; GFX942-NEXT: s_mov_b32 s33, s2
+; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%alloca0 = alloca [4096 x i32], align 64, addrspace(5)
%alloca1 = alloca i32, align 4, addrspace(5)
call void asm sideeffect "; use alloca0 $0", "v"(ptr addrspace(5) %alloca0)
- call void asm sideeffect "; use $0, $1", "{s59},{scc}"(ptr addrspace(5) %alloca1, i32 0)
+ call void asm sideeffect "; use $0, $1", "{s55},{scc}"(ptr addrspace(5) %alloca1, i32 0)
----------------
shiltian wrote:
`s59` is no longer lives in because it is caller saved. Switch to `s55` here, but I'm not sure why there are massive spillings generated.
https://github.com/llvm/llvm-project/pull/130644
More information about the llvm-commits
mailing list