[llvm] [AMDGPU] Make chain functions receive a stack pointer (PR #184616)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 10 23:13:00 PDT 2026
================
@@ -90,27 +153,27 @@ define amdgpu_cs_chain void @test_alloca_var(i32 %count) {
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_lshl_add_u32 v0, v8, 2, 15
-; GFX12-NEXT: s_mov_b32 s1, exec_lo
-; GFX12-NEXT: s_mov_b32 s0, 0
-; GFX12-NEXT: s_mov_b32 s32, 16
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-NEXT: s_mov_b64 s[0:1], exec
+; GFX12-NEXT: s_mov_b32 s2, 0
+; GFX12-NEXT: s_mov_b32 s33, s32
+; GFX12-NEXT: s_add_co_i32 s32, s32, 16
; GFX12-NEXT: v_and_b32_e32 v1, -16, v0
; GFX12-NEXT: v_mov_b32_e32 v0, 0
-; GFX12-NEXT: .LBB2_1: ; =>This Inner Loop Header: Depth=1
+; GFX12-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0)
-; GFX12-NEXT: s_ctz_i32_b32 s2, s1
+; GFX12-NEXT: s_ctz_i32_b64 s3, s[0:1]
; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0)
-; GFX12-NEXT: v_readlane_b32 s3, v1, s2
-; GFX12-NEXT: s_bitset0_b32 s1, s2
-; GFX12-NEXT: s_max_u32 s0, s0, s3
+; GFX12-NEXT: v_readlane_b32 s4, v1, s3
+; GFX12-NEXT: s_bitset0_b64 s[0:1], s3
+; GFX12-NEXT: s_max_u32 s2, s2, s4
; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0)
-; GFX12-NEXT: s_cmp_lg_u32 s1, 0
-; GFX12-NEXT: s_cbranch_scc1 .LBB2_1
+; GFX12-NEXT: s_cmp_lg_u64 s[0:1], 0
+; GFX12-NEXT: s_cbranch_scc1 .LBB3_1
; GFX12-NEXT: ; %bb.2:
-; GFX12-NEXT: s_mov_b32 s1, s32
+; GFX12-NEXT: s_mov_b32 s0, s32
; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0)
-; GFX12-NEXT: v_lshl_add_u32 v1, s0, 5, s1
-; GFX12-NEXT: scratch_store_b32 off, v0, s1
+; GFX12-NEXT: v_lshl_add_u32 v1, s2, 6, s0
----------------
easyonaadit wrote:
I didn't really understand this. Why is it left-shifting by 6? It seems as if the wave size has been changed from 32 to 64.
https://github.com/llvm/llvm-project/pull/184616
More information about the llvm-commits
mailing list