[llvm] [AMDGPU] Allow negative offsets in scratch insts (PR #166979)
Diana Picus via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 26 03:23:39 PST 2025
================
@@ -0,0 +1,230 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc < %s -verify-machineinstrs -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 | FileCheck %s --check-prefix=GFX950
+
+; Ensure we don't crash with: "Cannot scavenge register in FI elimination!"
+define amdgpu_kernel void @issue155902(i64 %arg, i64 %arg1, i64 %arg2, i64 %arg3, i64 %arg4, i64 %arg5, i64 %arg6, i64 %arg7, i64 %arg8, i64 %arg9, i64 %arg10, i64 %arg11, i64 %arg12, i64 %arg13, i64 %arg14, i64 %arg15, i64 %arg16, i64 %arg17, i64 %arg18, i64 %arg19, i64 %arg20, i64 %arg21, i64 %arg22, i64 %arg23, i64 %arg24, i64 %arg25, i64 %arg26, i64 %arg27, i64 %arg28, i64 %arg29, i64 %arg30, i64 %arg31, i64 %arg32, i64 %arg33, i64 %arg34, i64 %arg35, i64 %arg36, i64 %arg37, i64 %arg38, i64 %arg39, i64 %arg40, i64 %arg41, i64 %arg42, i64 %arg43, i64 %arg44, i64 %arg45, i64 %arg46, i64 %arg47, i64 %arg48, i64 %arg49) {
+; GFX950-LABEL: issue155902:
+; GFX950: ; %bb.0: ; %bb
+; GFX950-NEXT: s_mov_b32 s33, 0x4008
+; GFX950-NEXT: s_mov_b64 s[6:7], s[4:5]
+; GFX950-NEXT: s_load_dwordx2 s[30:31], s[6:7], 0x0
+; GFX950-NEXT: s_load_dwordx2 s[38:39], s[6:7], 0x8
+; GFX950-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x10
+; GFX950-NEXT: s_load_dwordx2 vcc, s[6:7], 0x18
+; GFX950-NEXT: s_load_dwordx2 s[98:99], s[6:7], 0x20
+; GFX950-NEXT: s_load_dwordx2 s[96:97], s[6:7], 0x28
+; GFX950-NEXT: s_load_dwordx2 s[94:95], s[6:7], 0x30
+; GFX950-NEXT: s_load_dwordx2 s[92:93], s[6:7], 0x38
+; GFX950-NEXT: s_load_dwordx2 s[90:91], s[6:7], 0x40
+; GFX950-NEXT: s_load_dwordx2 s[88:89], s[6:7], 0x48
+; GFX950-NEXT: s_load_dwordx2 s[86:87], s[6:7], 0x50
+; GFX950-NEXT: s_load_dwordx2 s[84:85], s[6:7], 0x58
+; GFX950-NEXT: s_load_dwordx2 s[82:83], s[6:7], 0x60
+; GFX950-NEXT: s_load_dwordx2 s[80:81], s[6:7], 0x68
+; GFX950-NEXT: s_load_dwordx2 s[78:79], s[6:7], 0x70
+; GFX950-NEXT: s_load_dwordx2 s[76:77], s[6:7], 0x78
+; GFX950-NEXT: s_load_dwordx2 s[74:75], s[6:7], 0x80
+; GFX950-NEXT: s_load_dwordx2 s[72:73], s[6:7], 0x88
+; GFX950-NEXT: s_load_dwordx2 s[70:71], s[6:7], 0x90
+; GFX950-NEXT: s_load_dwordx2 s[68:69], s[6:7], 0x98
+; GFX950-NEXT: s_load_dwordx2 s[66:67], s[6:7], 0xa0
+; GFX950-NEXT: s_load_dwordx2 s[64:65], s[6:7], 0xa8
+; GFX950-NEXT: s_load_dwordx2 s[62:63], s[6:7], 0xb0
+; GFX950-NEXT: s_load_dwordx2 s[60:61], s[6:7], 0xb8
+; GFX950-NEXT: s_load_dwordx2 s[58:59], s[6:7], 0xc0
+; GFX950-NEXT: s_load_dwordx2 s[56:57], s[6:7], 0xc8
+; GFX950-NEXT: s_load_dwordx2 s[54:55], s[6:7], 0xd0
+; GFX950-NEXT: s_load_dwordx2 s[52:53], s[6:7], 0xd8
+; GFX950-NEXT: s_load_dwordx2 s[50:51], s[6:7], 0xe0
+; GFX950-NEXT: s_load_dwordx2 s[48:49], s[6:7], 0xe8
+; GFX950-NEXT: s_load_dwordx2 s[46:47], s[6:7], 0xf0
+; GFX950-NEXT: s_load_dwordx2 s[44:45], s[6:7], 0xf8
+; GFX950-NEXT: s_load_dwordx2 s[42:43], s[6:7], 0x100
+; GFX950-NEXT: s_load_dwordx2 s[40:41], s[6:7], 0x108
+; GFX950-NEXT: s_load_dwordx2 s[34:35], s[6:7], 0x110
+; GFX950-NEXT: s_load_dwordx2 s[36:37], s[6:7], 0x118
+; GFX950-NEXT: s_load_dwordx2 s[2:3], s[6:7], 0x120
+; GFX950-NEXT: s_load_dwordx2 s[14:15], s[6:7], 0x128
+; GFX950-NEXT: s_load_dwordx2 s[28:29], s[6:7], 0x130
+; GFX950-NEXT: s_load_dwordx2 s[26:27], s[6:7], 0x138
+; GFX950-NEXT: s_load_dwordx2 s[24:25], s[6:7], 0x140
+; GFX950-NEXT: s_load_dwordx2 s[22:23], s[6:7], 0x148
+; GFX950-NEXT: s_load_dwordx2 s[20:21], s[6:7], 0x150
+; GFX950-NEXT: s_load_dwordx2 s[18:19], s[6:7], 0x158
+; GFX950-NEXT: s_load_dwordx2 s[16:17], s[6:7], 0x160
+; GFX950-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x168
+; GFX950-NEXT: s_load_dwordx2 s[12:13], s[6:7], 0x170
+; GFX950-NEXT: s_load_dwordx2 s[10:11], s[6:7], 0x178
+; GFX950-NEXT: s_load_dwordx2 s[8:9], s[6:7], 0x180
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: s_load_dwordx2 s[6:7], s[6:7], 0x188
+; GFX950-NEXT: v_mov_b64_e32 v[0:1], 0
+; GFX950-NEXT: v_mov_b32_e32 v3, 0x4008
+; GFX950-NEXT: scratch_store_dwordx2 v3, v[0:1], off
+; GFX950-NEXT: scratch_store_dwordx2 off, v[0:1], s33
+; GFX950-NEXT: scratch_store_dwordx2 off, v[0:1], s33 offset:16
----------------
rovka wrote:
I'm wondering if we could get this to work regardless of negative offset support by sorting after the offset of the access rather than the LocalOffset. Can you try to add another member to FrameRef containing LocalOffset + InstrOffset and sort by that in [localstackalloc](https://github.com/llvm/llvm-project/blob/3036de77239f0b29b3619ff6e468ccf5845c7e91/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp#L358)?
https://github.com/llvm/llvm-project/pull/166979
More information about the llvm-commits
mailing list