[llvm] [AMDGPU] Initialize FrameOffsetReg for amdgpu_cs_chain functions (PR #165518)

Wed Nov 5 01:07:59 PST 2025

================
@@ -0,0 +1,64 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx942 -O0 -verify-machineinstrs < %s | FileCheck %s
+
+define amdgpu_cs_chain void @indirect(ptr %callee) {
+; CHECK-LABEL: indirect:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    s_mov_b32 s32, 16
+; CHECK-NEXT:    s_xor_saveexec_b64 s[0:1], -1
+; CHECK-NEXT:    scratch_store_dword off, v40, off ; 4-byte Folded Spill
+; CHECK-NEXT:    scratch_store_dword off, v41, off offset:4 ; 4-byte Folded Spill
+; CHECK-NEXT:    s_mov_b64 exec, s[0:1]
+; CHECK-NEXT:    ; implicit-def: $sgpr6_sgpr7
+; CHECK-NEXT:    ; implicit-def: $sgpr10_sgpr11
+; CHECK-NEXT:    ; implicit-def: $sgpr0
+; CHECK-NEXT:    s_mov_b32 s1, 0
+; CHECK-NEXT:    ; implicit-def: $vgpr40 : SGPR spill to VGPR lane
+; CHECK-NEXT:    v_writelane_b32 v40, s1, 0
+; CHECK-NEXT:    v_mov_b32_e32 v0, s1
+; CHECK-NEXT:    v_mov_b32_e32 v1, s1
+; CHECK-NEXT:    s_mov_b64 s[4:5], s[6:7]
+; CHECK-NEXT:    s_mov_b64 s[8:9], 36
+; CHECK-NEXT:    s_mov_b32 s12, s0
+; CHECK-NEXT:    s_mov_b32 s13, s0
+; CHECK-NEXT:    s_mov_b32 s14, s0
+; CHECK-NEXT:    s_mov_b32 s15, s0
+; CHECK-NEXT:    v_mov_b32_e32 v31, s0
+; CHECK-NEXT:    s_getpc_b64 s[0:1]
+; CHECK-NEXT:    s_add_u32 s0, s0, indirect at gotpcrel32@lo+4
+; CHECK-NEXT:    s_addc_u32 s1, s1, indirect at gotpcrel32@hi+12
+; CHECK-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
+; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
+; CHECK-NEXT:    s_swappc_b64 s[30:31], s[0:1]
+; CHECK-NEXT:    v_readlane_b32 s3, v40, 0
+; CHECK-NEXT:    s_nop 1
+; CHECK-NEXT:    v_mov_b32_e32 v0, s3
+; CHECK-NEXT:    s_nop 0
+; CHECK-NEXT:    v_readfirstlane_b32 s0, v0
+; CHECK-NEXT:    v_mov_b32_e32 v0, s3
+; CHECK-NEXT:    s_nop 0
+; CHECK-NEXT:    v_readfirstlane_b32 s1, v0
+; CHECK-NEXT:    v_mov_b32_e32 v0, s3
+; CHECK-NEXT:    s_nop 0
+; CHECK-NEXT:    v_readfirstlane_b32 s2, v0
+; CHECK-NEXT:    v_mov_b32_e32 v8, s3
+; CHECK-NEXT:    s_mov_b32 s4, 0
+; CHECK-NEXT:    v_mov_b32_e32 v9, s4
+; CHECK-NEXT:    v_mov_b32_e32 v10, s3
+; CHECK-NEXT:    v_mov_b32_e32 v11, s3
+; CHECK-NEXT:    s_mov_b64 s[4:5], 0
+; CHECK-NEXT:    v_readlane_b32 s3, v41, 0
+; CHECK-NEXT:    s_xor_saveexec_b64 s[6:7], -1
+; CHECK-NEXT:    scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
----------------
rovka wrote:

This isn't correct, since the stores don't use fp_reg, they just use plain offsets. It looks like this is just an oversight where we used the wrong register for the restores. You should check who's inserting these spills and restores and make sure they're handled the same way, rather than force the FP to s33. (I could've sworn I fixed something like this a while ago but I can't find it right now).

https://github.com/llvm/llvm-project/pull/165518