[llvm] [AMDGPU] Convert flat scratch SS->SV in FI elimination (PR #166979)

Diana Picus via llvm-commits llvm-commits at lists.llvm.org
Tue Nov 11 01:46:25 PST 2025


================
@@ -0,0 +1,230 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc < %s -verify-machineinstrs -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 | FileCheck %s --check-prefix=GFX950
+
+; Ensure we don't crash with: "Cannot scavenge register in FI elimination!"
+define amdgpu_kernel void @issue155902(i64 %arg, i64 %arg1, i64 %arg2, i64 %arg3, i64 %arg4, i64 %arg5, i64 %arg6, i64 %arg7, i64 %arg8, i64 %arg9, i64 %arg10, i64 %arg11, i64 %arg12, i64 %arg13, i64 %arg14, i64 %arg15, i64 %arg16, i64 %arg17, i64 %arg18, i64 %arg19, i64 %arg20, i64 %arg21, i64 %arg22, i64 %arg23, i64 %arg24, i64 %arg25, i64 %arg26, i64 %arg27, i64 %arg28, i64 %arg29, i64 %arg30, i64 %arg31, i64 %arg32, i64 %arg33, i64 %arg34, i64 %arg35, i64 %arg36, i64 %arg37, i64 %arg38, i64 %arg39, i64 %arg40, i64 %arg41, i64 %arg42, i64 %arg43, i64 %arg44, i64 %arg45, i64 %arg46, i64 %arg47, i64 %arg48, i64 %arg49) {
+; GFX950-LABEL: issue155902:
+; GFX950:       ; %bb.0: ; %bb
+; GFX950-NEXT:    s_mov_b32 s33, 0x4008
+; GFX950-NEXT:    s_mov_b64 s[6:7], s[4:5]
+; GFX950-NEXT:    s_load_dwordx2 s[30:31], s[6:7], 0x0
+; GFX950-NEXT:    s_load_dwordx2 s[38:39], s[6:7], 0x8
+; GFX950-NEXT:    s_load_dwordx2 s[0:1], s[6:7], 0x10
+; GFX950-NEXT:    s_load_dwordx2 vcc, s[6:7], 0x18
+; GFX950-NEXT:    s_load_dwordx2 s[98:99], s[6:7], 0x20
+; GFX950-NEXT:    s_load_dwordx2 s[96:97], s[6:7], 0x28
+; GFX950-NEXT:    s_load_dwordx2 s[94:95], s[6:7], 0x30
+; GFX950-NEXT:    s_load_dwordx2 s[92:93], s[6:7], 0x38
+; GFX950-NEXT:    s_load_dwordx2 s[90:91], s[6:7], 0x40
+; GFX950-NEXT:    s_load_dwordx2 s[88:89], s[6:7], 0x48
+; GFX950-NEXT:    s_load_dwordx2 s[86:87], s[6:7], 0x50
+; GFX950-NEXT:    s_load_dwordx2 s[84:85], s[6:7], 0x58
+; GFX950-NEXT:    s_load_dwordx2 s[82:83], s[6:7], 0x60
+; GFX950-NEXT:    s_load_dwordx2 s[80:81], s[6:7], 0x68
+; GFX950-NEXT:    s_load_dwordx2 s[78:79], s[6:7], 0x70
+; GFX950-NEXT:    s_load_dwordx2 s[76:77], s[6:7], 0x78
+; GFX950-NEXT:    s_load_dwordx2 s[74:75], s[6:7], 0x80
+; GFX950-NEXT:    s_load_dwordx2 s[72:73], s[6:7], 0x88
+; GFX950-NEXT:    s_load_dwordx2 s[70:71], s[6:7], 0x90
+; GFX950-NEXT:    s_load_dwordx2 s[68:69], s[6:7], 0x98
+; GFX950-NEXT:    s_load_dwordx2 s[66:67], s[6:7], 0xa0
+; GFX950-NEXT:    s_load_dwordx2 s[64:65], s[6:7], 0xa8
+; GFX950-NEXT:    s_load_dwordx2 s[62:63], s[6:7], 0xb0
+; GFX950-NEXT:    s_load_dwordx2 s[60:61], s[6:7], 0xb8
+; GFX950-NEXT:    s_load_dwordx2 s[58:59], s[6:7], 0xc0
+; GFX950-NEXT:    s_load_dwordx2 s[56:57], s[6:7], 0xc8
+; GFX950-NEXT:    s_load_dwordx2 s[54:55], s[6:7], 0xd0
+; GFX950-NEXT:    s_load_dwordx2 s[52:53], s[6:7], 0xd8
+; GFX950-NEXT:    s_load_dwordx2 s[50:51], s[6:7], 0xe0
+; GFX950-NEXT:    s_load_dwordx2 s[48:49], s[6:7], 0xe8
+; GFX950-NEXT:    s_load_dwordx2 s[46:47], s[6:7], 0xf0
+; GFX950-NEXT:    s_load_dwordx2 s[44:45], s[6:7], 0xf8
+; GFX950-NEXT:    s_load_dwordx2 s[42:43], s[6:7], 0x100
+; GFX950-NEXT:    s_load_dwordx2 s[40:41], s[6:7], 0x108
+; GFX950-NEXT:    s_load_dwordx2 s[34:35], s[6:7], 0x110
+; GFX950-NEXT:    s_load_dwordx2 s[36:37], s[6:7], 0x118
+; GFX950-NEXT:    s_load_dwordx2 s[2:3], s[6:7], 0x120
+; GFX950-NEXT:    s_load_dwordx2 s[14:15], s[6:7], 0x128
+; GFX950-NEXT:    s_load_dwordx2 s[28:29], s[6:7], 0x130
+; GFX950-NEXT:    s_load_dwordx2 s[26:27], s[6:7], 0x138
+; GFX950-NEXT:    s_load_dwordx2 s[24:25], s[6:7], 0x140
+; GFX950-NEXT:    s_load_dwordx2 s[22:23], s[6:7], 0x148
+; GFX950-NEXT:    s_load_dwordx2 s[20:21], s[6:7], 0x150
+; GFX950-NEXT:    s_load_dwordx2 s[18:19], s[6:7], 0x158
+; GFX950-NEXT:    s_load_dwordx2 s[16:17], s[6:7], 0x160
+; GFX950-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x168
+; GFX950-NEXT:    s_load_dwordx2 s[12:13], s[6:7], 0x170
+; GFX950-NEXT:    s_load_dwordx2 s[10:11], s[6:7], 0x178
+; GFX950-NEXT:    s_load_dwordx2 s[8:9], s[6:7], 0x180
+; GFX950-NEXT:    s_nop 0
+; GFX950-NEXT:    s_load_dwordx2 s[6:7], s[6:7], 0x188
+; GFX950-NEXT:    v_mov_b64_e32 v[0:1], 0
+; GFX950-NEXT:    v_mov_b32_e32 v3, 0x4008
+; GFX950-NEXT:    scratch_store_dwordx2 v3, v[0:1], off
+; GFX950-NEXT:    scratch_store_dwordx2 off, v[0:1], s33
+; GFX950-NEXT:    scratch_store_dwordx2 off, v[0:1], s33 offset:16
----------------
rovka wrote:

Why do these stores manage to use s33, but the first one needs a VGPR offset?

(Also, maybe store some actual values rather than 0, that would make it easier to track where each store comes from. At the moment AFAICT both v3 and s33 hold 0x4008, so it's hard to tell why we have 2 stores)

https://github.com/llvm/llvm-project/pull/166979


More information about the llvm-commits mailing list