[llvm] [AMDGPU] Convert flat scratch SS->SV in FI elimination (PR #166979)

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Mon Nov 24 09:35:14 PST 2025


================
@@ -0,0 +1,230 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc < %s -verify-machineinstrs -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 | FileCheck %s --check-prefix=GFX950
+
+; Ensure we don't crash with: "Cannot scavenge register in FI elimination!"
+define amdgpu_kernel void @issue155902(i64 %arg, i64 %arg1, i64 %arg2, i64 %arg3, i64 %arg4, i64 %arg5, i64 %arg6, i64 %arg7, i64 %arg8, i64 %arg9, i64 %arg10, i64 %arg11, i64 %arg12, i64 %arg13, i64 %arg14, i64 %arg15, i64 %arg16, i64 %arg17, i64 %arg18, i64 %arg19, i64 %arg20, i64 %arg21, i64 %arg22, i64 %arg23, i64 %arg24, i64 %arg25, i64 %arg26, i64 %arg27, i64 %arg28, i64 %arg29, i64 %arg30, i64 %arg31, i64 %arg32, i64 %arg33, i64 %arg34, i64 %arg35, i64 %arg36, i64 %arg37, i64 %arg38, i64 %arg39, i64 %arg40, i64 %arg41, i64 %arg42, i64 %arg43, i64 %arg44, i64 %arg45, i64 %arg46, i64 %arg47, i64 %arg48, i64 %arg49) {
+; GFX950-LABEL: issue155902:
+; GFX950:       ; %bb.0: ; %bb
+; GFX950-NEXT:    s_mov_b32 s33, 0x4008
+; GFX950-NEXT:    s_mov_b64 s[6:7], s[4:5]
+; GFX950-NEXT:    s_load_dwordx2 s[30:31], s[6:7], 0x0
+; GFX950-NEXT:    s_load_dwordx2 s[38:39], s[6:7], 0x8
+; GFX950-NEXT:    s_load_dwordx2 s[0:1], s[6:7], 0x10
+; GFX950-NEXT:    s_load_dwordx2 vcc, s[6:7], 0x18
+; GFX950-NEXT:    s_load_dwordx2 s[98:99], s[6:7], 0x20
+; GFX950-NEXT:    s_load_dwordx2 s[96:97], s[6:7], 0x28
+; GFX950-NEXT:    s_load_dwordx2 s[94:95], s[6:7], 0x30
+; GFX950-NEXT:    s_load_dwordx2 s[92:93], s[6:7], 0x38
+; GFX950-NEXT:    s_load_dwordx2 s[90:91], s[6:7], 0x40
+; GFX950-NEXT:    s_load_dwordx2 s[88:89], s[6:7], 0x48
+; GFX950-NEXT:    s_load_dwordx2 s[86:87], s[6:7], 0x50
+; GFX950-NEXT:    s_load_dwordx2 s[84:85], s[6:7], 0x58
+; GFX950-NEXT:    s_load_dwordx2 s[82:83], s[6:7], 0x60
+; GFX950-NEXT:    s_load_dwordx2 s[80:81], s[6:7], 0x68
+; GFX950-NEXT:    s_load_dwordx2 s[78:79], s[6:7], 0x70
+; GFX950-NEXT:    s_load_dwordx2 s[76:77], s[6:7], 0x78
+; GFX950-NEXT:    s_load_dwordx2 s[74:75], s[6:7], 0x80
+; GFX950-NEXT:    s_load_dwordx2 s[72:73], s[6:7], 0x88
+; GFX950-NEXT:    s_load_dwordx2 s[70:71], s[6:7], 0x90
+; GFX950-NEXT:    s_load_dwordx2 s[68:69], s[6:7], 0x98
+; GFX950-NEXT:    s_load_dwordx2 s[66:67], s[6:7], 0xa0
+; GFX950-NEXT:    s_load_dwordx2 s[64:65], s[6:7], 0xa8
+; GFX950-NEXT:    s_load_dwordx2 s[62:63], s[6:7], 0xb0
+; GFX950-NEXT:    s_load_dwordx2 s[60:61], s[6:7], 0xb8
+; GFX950-NEXT:    s_load_dwordx2 s[58:59], s[6:7], 0xc0
+; GFX950-NEXT:    s_load_dwordx2 s[56:57], s[6:7], 0xc8
+; GFX950-NEXT:    s_load_dwordx2 s[54:55], s[6:7], 0xd0
+; GFX950-NEXT:    s_load_dwordx2 s[52:53], s[6:7], 0xd8
+; GFX950-NEXT:    s_load_dwordx2 s[50:51], s[6:7], 0xe0
+; GFX950-NEXT:    s_load_dwordx2 s[48:49], s[6:7], 0xe8
+; GFX950-NEXT:    s_load_dwordx2 s[46:47], s[6:7], 0xf0
+; GFX950-NEXT:    s_load_dwordx2 s[44:45], s[6:7], 0xf8
+; GFX950-NEXT:    s_load_dwordx2 s[42:43], s[6:7], 0x100
+; GFX950-NEXT:    s_load_dwordx2 s[40:41], s[6:7], 0x108
+; GFX950-NEXT:    s_load_dwordx2 s[34:35], s[6:7], 0x110
+; GFX950-NEXT:    s_load_dwordx2 s[36:37], s[6:7], 0x118
+; GFX950-NEXT:    s_load_dwordx2 s[2:3], s[6:7], 0x120
+; GFX950-NEXT:    s_load_dwordx2 s[14:15], s[6:7], 0x128
+; GFX950-NEXT:    s_load_dwordx2 s[28:29], s[6:7], 0x130
+; GFX950-NEXT:    s_load_dwordx2 s[26:27], s[6:7], 0x138
+; GFX950-NEXT:    s_load_dwordx2 s[24:25], s[6:7], 0x140
+; GFX950-NEXT:    s_load_dwordx2 s[22:23], s[6:7], 0x148
+; GFX950-NEXT:    s_load_dwordx2 s[20:21], s[6:7], 0x150
+; GFX950-NEXT:    s_load_dwordx2 s[18:19], s[6:7], 0x158
+; GFX950-NEXT:    s_load_dwordx2 s[16:17], s[6:7], 0x160
+; GFX950-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x168
+; GFX950-NEXT:    s_load_dwordx2 s[12:13], s[6:7], 0x170
+; GFX950-NEXT:    s_load_dwordx2 s[10:11], s[6:7], 0x178
+; GFX950-NEXT:    s_load_dwordx2 s[8:9], s[6:7], 0x180
+; GFX950-NEXT:    s_nop 0
+; GFX950-NEXT:    s_load_dwordx2 s[6:7], s[6:7], 0x188
+; GFX950-NEXT:    v_mov_b64_e32 v[0:1], 0
+; GFX950-NEXT:    v_mov_b32_e32 v3, 0x4008
+; GFX950-NEXT:    scratch_store_dwordx2 v3, v[0:1], off
+; GFX950-NEXT:    scratch_store_dwordx2 off, v[0:1], s33
+; GFX950-NEXT:    scratch_store_dwordx2 off, v[0:1], s33 offset:16
----------------
arsenm wrote:

Scratch stores are supposed to support negative offsets. I think there was a bug in the first gfx9s related to them, maybe? In any case, on modern targets we should be switching the ABI to use negative offsetting for access of local stack slots 

https://github.com/llvm/llvm-project/pull/166979


More information about the llvm-commits mailing list