[llvm] [AMDGPU] Fix test failures when expensive checks are enabled (PR #130644)

Shilei Tian via llvm-commits llvm-commits at lists.llvm.org
Mon Mar 10 10:46:22 PDT 2025


================
@@ -369,112 +570,181 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_fp() #1 {
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    s_mov_b32 s6, s33
 ; GFX8-NEXT:    s_mov_b32 s33, s32
+; GFX8-NEXT:    s_xor_saveexec_b64 s[4:5], -1
+; GFX8-NEXT:    s_add_i32 s7, s33, 0x101100
+; GFX8-NEXT:    buffer_store_dword v1, off, s[0:3], s7 ; 4-byte Folded Spill
+; GFX8-NEXT:    s_mov_b64 exec, s[4:5]
 ; GFX8-NEXT:    v_lshrrev_b32_e64 v0, 6, s33
 ; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 64, v0
+; GFX8-NEXT:    v_writelane_b32 v1, s55, 0
 ; GFX8-NEXT:    ;;#ASMSTART
 ; GFX8-NEXT:    ; use alloca0 v0
 ; GFX8-NEXT:    ;;#ASMEND
 ; GFX8-NEXT:    v_lshrrev_b32_e64 v0, 6, s33
-; GFX8-NEXT:    s_movk_i32 s59, 0x4040
+; GFX8-NEXT:    s_movk_i32 s55, 0x4040
+; GFX8-NEXT:    v_add_u32_e32 v0, vcc, s55, v0
 ; GFX8-NEXT:    s_add_i32 s32, s32, 0x102000
-; GFX8-NEXT:    v_add_u32_e32 v0, vcc, s59, v0
+; GFX8-NEXT:    v_readfirstlane_b32 s55, v0
 ; GFX8-NEXT:    s_and_b64 s[4:5], 0, exec
-; GFX8-NEXT:    v_readfirstlane_b32 s59, v0
 ; GFX8-NEXT:    ;;#ASMSTART
-; GFX8-NEXT:    ; use s59, scc
+; GFX8-NEXT:    ; use s55, scc
 ; GFX8-NEXT:    ;;#ASMEND
+; GFX8-NEXT:    v_readlane_b32 s55, v1, 0
 ; GFX8-NEXT:    s_mov_b32 s32, s33
+; GFX8-NEXT:    s_xor_saveexec_b64 s[4:5], -1
+; GFX8-NEXT:    s_add_i32 s7, s33, 0x101100
+; GFX8-NEXT:    buffer_load_dword v1, off, s[0:3], s7 ; 4-byte Folded Reload
+; GFX8-NEXT:    s_mov_b64 exec, s[4:5]
 ; GFX8-NEXT:    s_mov_b32 s33, s6
+; GFX8-NEXT:    s_waitcnt vmcnt(0)
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_fp:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX900-NEXT:    s_mov_b32 s6, s33
 ; GFX900-NEXT:    s_mov_b32 s33, s32
+; GFX900-NEXT:    s_xor_saveexec_b64 s[4:5], -1
+; GFX900-NEXT:    s_add_i32 s7, s33, 0x101100
+; GFX900-NEXT:    buffer_store_dword v1, off, s[0:3], s7 ; 4-byte Folded Spill
+; GFX900-NEXT:    s_mov_b64 exec, s[4:5]
 ; GFX900-NEXT:    v_lshrrev_b32_e64 v0, 6, s33
 ; GFX900-NEXT:    v_add_u32_e32 v0, 64, v0
 ; GFX900-NEXT:    ;;#ASMSTART
 ; GFX900-NEXT:    ; use alloca0 v0
 ; GFX900-NEXT:    ;;#ASMEND
 ; GFX900-NEXT:    v_lshrrev_b32_e64 v0, 6, s33
-; GFX900-NEXT:    s_add_i32 s32, s32, 0x102000
 ; GFX900-NEXT:    v_add_u32_e32 v0, 0x4040, v0
+; GFX900-NEXT:    s_add_i32 s32, s32, 0x102000
+; GFX900-NEXT:    v_writelane_b32 v1, s55, 0
+; GFX900-NEXT:    v_readfirstlane_b32 s55, v0
 ; GFX900-NEXT:    s_and_b64 s[4:5], 0, exec
-; GFX900-NEXT:    v_readfirstlane_b32 s59, v0
 ; GFX900-NEXT:    ;;#ASMSTART
-; GFX900-NEXT:    ; use s59, scc
+; GFX900-NEXT:    ; use s55, scc
 ; GFX900-NEXT:    ;;#ASMEND
+; GFX900-NEXT:    v_readlane_b32 s55, v1, 0
 ; GFX900-NEXT:    s_mov_b32 s32, s33
+; GFX900-NEXT:    s_xor_saveexec_b64 s[4:5], -1
+; GFX900-NEXT:    s_add_i32 s7, s33, 0x101100
+; GFX900-NEXT:    buffer_load_dword v1, off, s[0:3], s7 ; 4-byte Folded Reload
+; GFX900-NEXT:    s_mov_b64 exec, s[4:5]
 ; GFX900-NEXT:    s_mov_b32 s33, s6
+; GFX900-NEXT:    s_waitcnt vmcnt(0)
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX942-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_fp:
 ; GFX942:       ; %bb.0:
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX942-NEXT:    s_mov_b32 s2, s33
 ; GFX942-NEXT:    s_mov_b32 s33, s32
+; GFX942-NEXT:    s_xor_saveexec_b64 s[0:1], -1
+; GFX942-NEXT:    s_add_i32 s3, s33, 0x4044
+; GFX942-NEXT:    scratch_store_dword off, v1, s3 ; 4-byte Folded Spill
+; GFX942-NEXT:    s_mov_b64 exec, s[0:1]
 ; GFX942-NEXT:    s_addk_i32 s32, 0x4080
 ; GFX942-NEXT:    s_add_i32 s0, s33, 64
 ; GFX942-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX942-NEXT:    s_and_b64 s[0:1], 0, exec
 ; GFX942-NEXT:    s_addc_u32 s0, s33, 0x4040
 ; GFX942-NEXT:    s_bitcmp1_b32 s0, 0
 ; GFX942-NEXT:    s_bitset0_b32 s0, 0
+; GFX942-NEXT:    v_writelane_b32 v1, s55, 0
+; GFX942-NEXT:    s_mov_b32 s55, s0
 ; GFX942-NEXT:    ;;#ASMSTART
 ; GFX942-NEXT:    ; use alloca0 v0
 ; GFX942-NEXT:    ;;#ASMEND
-; GFX942-NEXT:    s_mov_b32 s59, s0
 ; GFX942-NEXT:    ;;#ASMSTART
-; GFX942-NEXT:    ; use s59, scc
+; GFX942-NEXT:    ; use s55, scc
 ; GFX942-NEXT:    ;;#ASMEND
+; GFX942-NEXT:    v_readlane_b32 s55, v1, 0
 ; GFX942-NEXT:    s_mov_b32 s32, s33
+; GFX942-NEXT:    s_xor_saveexec_b64 s[0:1], -1
+; GFX942-NEXT:    s_add_i32 s3, s33, 0x4044
+; GFX942-NEXT:    scratch_load_dword v1, off, s3 ; 4-byte Folded Reload
+; GFX942-NEXT:    s_mov_b64 exec, s[0:1]
 ; GFX942-NEXT:    s_mov_b32 s33, s2
+; GFX942-NEXT:    s_waitcnt vmcnt(0)
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
   %alloca0 = alloca [4096 x i32], align 64, addrspace(5)
   %alloca1 = alloca i32, align 4, addrspace(5)
   call void asm sideeffect "; use alloca0 $0", "v"(ptr addrspace(5) %alloca0)
-  call void asm sideeffect "; use $0, $1", "{s59},{scc}"(ptr addrspace(5) %alloca1, i32 0)
+  call void asm sideeffect "; use $0, $1", "{s55},{scc}"(ptr addrspace(5) %alloca1, i32 0)
----------------
shiltian wrote:

`s59` is no longer lives in because it is caller saved. Switch to `s55` here, but I'm not sure why there are massive spillings generated.

https://github.com/llvm/llvm-project/pull/130644


More information about the llvm-commits mailing list