[llvm] [AMDGPU] Convert flat scratch SS->SV in FI elimination (PR #166979)

Anshil Gandhi via llvm-commits llvm-commits at lists.llvm.org
Tue Nov 25 11:01:12 PST 2025


https://github.com/gandhi56 updated https://github.com/llvm/llvm-project/pull/166979

>From a287a544e2e86be25a4a8b9fccae2e2d05c1fa0c Mon Sep 17 00:00:00 2001
From: Anshil Gandhi <gandhi21299 at gmail.com>
Date: Fri, 7 Nov 2025 12:24:43 -0500
Subject: [PATCH] [AMDGPU] Allow negative offsets in scratch insts

This patch enables LocalStackSlotAlloca pass to reuse
base registers in scratch spill/reload instructions.
Furthermore, issue #155902 resolves as there the PEI
no longer needs to scavenge an SGPR.
---
 llvm/lib/Target/AMDGPU/AMDGPU.td              |  10 +-
 .../CodeGen/AMDGPU/GlobalISel/flat-scratch.ll |  28 +--
 .../flat-scratch-neg-offset-bug-155902.ll     | 232 ++++++++++++++++++
 llvm/test/CodeGen/AMDGPU/flat-scratch-svs.ll  |   7 +-
 llvm/test/CodeGen/AMDGPU/flat-scratch.ll      |  51 ++--
 .../local-stack-alloc-block-sp-reference.ll   |  27 +-
 6 files changed, 284 insertions(+), 71 deletions(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/flat-scratch-neg-offset-bug-155902.ll

diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 0b61adf409948..65751f5277818 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -334,12 +334,6 @@ def FeatureFlatSegmentOffsetBug : SubtargetFeature<"flat-segment-offset-bug",
   "GFX10 bug where inst_offset is ignored when flat instructions access global memory"
 >;
 
-def FeatureNegativeScratchOffsetBug : SubtargetFeature<"negative-scratch-offset-bug",
-  "NegativeScratchOffsetBug",
-  "true",
-  "Negative immediate offsets in scratch instructions with an SGPR offset page fault on GFX9"
->;
-
 def FeatureNegativeUnalignedScratchOffsetBug : SubtargetFeature<"negative-unaligned-scratch-offset-bug",
   "NegativeUnalignedScratchOffsetBug",
   "true",
@@ -1542,8 +1536,8 @@ def FeatureGFX9 : GCNSubtargetFeatureGeneration<"GFX9",
    FeatureScalarFlatScratchInsts, FeatureScalarAtomics, FeatureR128A16,
    FeatureA16, FeatureSMemTimeInst, FeatureFastDenormalF32, FeatureSupportsXNACK,
    FeatureUnalignedBufferAccess, FeatureUnalignedScratchAccess,
-   FeatureUnalignedDSAccess, FeatureNegativeScratchOffsetBug, FeatureGWS,
-   FeatureDefaultComponentZero,FeatureVmemWriteVgprInOrder, FeatureMemToLDSLoad
+   FeatureUnalignedDSAccess, FeatureGWS, FeatureDefaultComponentZero,
+   FeatureVmemWriteVgprInOrder, FeatureMemToLDSLoad
   ]
 >;
 
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll
index 2356dad5275c9..f191b5e4fecb5 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll
@@ -3947,9 +3947,9 @@ define amdgpu_gs void @sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset(pt
 ; GFX9-NEXT:    s_add_u32 flat_scratch_lo, s0, s5
 ; GFX9-NEXT:    v_add_u32_e32 v0, s3, v0
 ; GFX9-NEXT:    s_addc_u32 flat_scratch_hi, s1, 0
-; GFX9-NEXT:    v_add3_u32 v0, s2, v0, -16
+; GFX9-NEXT:    v_add_u32_e32 v0, s2, v0
 ; GFX9-NEXT:    v_mov_b32_e32 v1, 15
-; GFX9-NEXT:    scratch_store_dword v0, v1, off
+; GFX9-NEXT:    scratch_store_dword v0, v1, off offset:-16
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    s_endpgm
 ;
@@ -3969,9 +3969,9 @@ define amdgpu_gs void @sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset(pt
 ; GFX942-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
 ; GFX942:       ; %bb.0: ; %bb
 ; GFX942-NEXT:    v_add_u32_e32 v0, s1, v0
-; GFX942-NEXT:    v_add3_u32 v0, s0, v0, -16
+; GFX942-NEXT:    v_add_u32_e32 v0, s0, v0
 ; GFX942-NEXT:    v_mov_b32_e32 v1, 15
-; GFX942-NEXT:    scratch_store_dword v0, v1, off sc0 sc1
+; GFX942-NEXT:    scratch_store_dword v0, v1, off offset:-16 sc0 sc1
 ; GFX942-NEXT:    s_waitcnt vmcnt(0)
 ; GFX942-NEXT:    s_endpgm
 ;
@@ -3996,9 +3996,9 @@ define amdgpu_gs void @sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset(pt
 ; UNALIGNED_GFX9-NEXT:    s_add_u32 flat_scratch_lo, s0, s5
 ; UNALIGNED_GFX9-NEXT:    v_add_u32_e32 v0, s3, v0
 ; UNALIGNED_GFX9-NEXT:    s_addc_u32 flat_scratch_hi, s1, 0
-; UNALIGNED_GFX9-NEXT:    v_add3_u32 v0, s2, v0, -16
+; UNALIGNED_GFX9-NEXT:    v_add_u32_e32 v0, s2, v0
 ; UNALIGNED_GFX9-NEXT:    v_mov_b32_e32 v1, 15
-; UNALIGNED_GFX9-NEXT:    scratch_store_dword v0, v1, off
+; UNALIGNED_GFX9-NEXT:    scratch_store_dword v0, v1, off offset:-16
 ; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; UNALIGNED_GFX9-NEXT:    s_endpgm
 ;
@@ -4018,9 +4018,9 @@ define amdgpu_gs void @sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset(pt
 ; UNALIGNED_GFX942-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
 ; UNALIGNED_GFX942:       ; %bb.0: ; %bb
 ; UNALIGNED_GFX942-NEXT:    v_add_u32_e32 v0, s1, v0
-; UNALIGNED_GFX942-NEXT:    v_add3_u32 v0, s0, v0, -16
+; UNALIGNED_GFX942-NEXT:    v_add_u32_e32 v0, s0, v0
 ; UNALIGNED_GFX942-NEXT:    v_mov_b32_e32 v1, 15
-; UNALIGNED_GFX942-NEXT:    scratch_store_dword v0, v1, off sc0 sc1
+; UNALIGNED_GFX942-NEXT:    scratch_store_dword v0, v1, off offset:-16 sc0 sc1
 ; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
 ; UNALIGNED_GFX942-NEXT:    s_endpgm
 ;
@@ -4052,8 +4052,7 @@ define amdgpu_gs void @sgpr_base_negative_offset(ptr addrspace(1) %out, ptr addr
 ; GFX9:       ; %bb.0: ; %entry
 ; GFX9-NEXT:    s_add_u32 flat_scratch_lo, s0, s5
 ; GFX9-NEXT:    s_addc_u32 flat_scratch_hi, s1, 0
-; GFX9-NEXT:    s_add_u32 s0, s2, 0xffffffe8
-; GFX9-NEXT:    scratch_load_dword v2, off, s0
+; GFX9-NEXT:    scratch_load_dword v2, off, s2 offset:-24
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    global_store_dword v[0:1], v2, off
 ; GFX9-NEXT:    s_endpgm
@@ -4071,8 +4070,7 @@ define amdgpu_gs void @sgpr_base_negative_offset(ptr addrspace(1) %out, ptr addr
 ;
 ; GFX942-LABEL: sgpr_base_negative_offset:
 ; GFX942:       ; %bb.0: ; %entry
-; GFX942-NEXT:    s_add_u32 s0, s0, 0xffffffe8
-; GFX942-NEXT:    scratch_load_dword v2, off, s0
+; GFX942-NEXT:    scratch_load_dword v2, off, s0 offset:-24
 ; GFX942-NEXT:    s_waitcnt vmcnt(0)
 ; GFX942-NEXT:    global_store_dword v[0:1], v2, off
 ; GFX942-NEXT:    s_endpgm
@@ -4095,8 +4093,7 @@ define amdgpu_gs void @sgpr_base_negative_offset(ptr addrspace(1) %out, ptr addr
 ; UNALIGNED_GFX9:       ; %bb.0: ; %entry
 ; UNALIGNED_GFX9-NEXT:    s_add_u32 flat_scratch_lo, s0, s5
 ; UNALIGNED_GFX9-NEXT:    s_addc_u32 flat_scratch_hi, s1, 0
-; UNALIGNED_GFX9-NEXT:    s_add_u32 s0, s2, 0xffffffe8
-; UNALIGNED_GFX9-NEXT:    scratch_load_dword v2, off, s0
+; UNALIGNED_GFX9-NEXT:    scratch_load_dword v2, off, s2 offset:-24
 ; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; UNALIGNED_GFX9-NEXT:    global_store_dword v[0:1], v2, off
 ; UNALIGNED_GFX9-NEXT:    s_endpgm
@@ -4114,8 +4111,7 @@ define amdgpu_gs void @sgpr_base_negative_offset(ptr addrspace(1) %out, ptr addr
 ;
 ; UNALIGNED_GFX942-LABEL: sgpr_base_negative_offset:
 ; UNALIGNED_GFX942:       ; %bb.0: ; %entry
-; UNALIGNED_GFX942-NEXT:    s_add_u32 s0, s0, 0xffffffe8
-; UNALIGNED_GFX942-NEXT:    scratch_load_dword v2, off, s0
+; UNALIGNED_GFX942-NEXT:    scratch_load_dword v2, off, s0 offset:-24
 ; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
 ; UNALIGNED_GFX942-NEXT:    global_store_dword v[0:1], v2, off
 ; UNALIGNED_GFX942-NEXT:    s_endpgm
diff --git a/llvm/test/CodeGen/AMDGPU/flat-scratch-neg-offset-bug-155902.ll b/llvm/test/CodeGen/AMDGPU/flat-scratch-neg-offset-bug-155902.ll
new file mode 100644
index 0000000000000..93ae5f3124e79
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/flat-scratch-neg-offset-bug-155902.ll
@@ -0,0 +1,232 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc < %s -verify-machineinstrs -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 | FileCheck %s --check-prefix=GFX950
+
+; Ensure we don't crash with: "Cannot scavenge register in FI elimination!"
+define amdgpu_kernel void @issue155902(i64 %arg, i64 %arg1, i64 %arg2, i64 %arg3, i64 %arg4, i64 %arg5, i64 %arg6, i64 %arg7, i64 %arg8, i64 %arg9, i64 %arg10, i64 %arg11, i64 %arg12, i64 %arg13, i64 %arg14, i64 %arg15, i64 %arg16, i64 %arg17, i64 %arg18, i64 %arg19, i64 %arg20, i64 %arg21, i64 %arg22, i64 %arg23, i64 %arg24, i64 %arg25, i64 %arg26, i64 %arg27, i64 %arg28, i64 %arg29, i64 %arg30, i64 %arg31, i64 %arg32, i64 %arg33, i64 %arg34, i64 %arg35, i64 %arg36, i64 %arg37, i64 %arg38, i64 %arg39, i64 %arg40, i64 %arg41, i64 %arg42, i64 %arg43, i64 %arg44, i64 %arg45, i64 %arg46, i64 %arg47, i64 %arg48, i64 %arg49) {
+; GFX950-LABEL: issue155902:
+; GFX950:       ; %bb.0: ; %bb
+; GFX950-NEXT:    s_mov_b32 s0, 8
+; GFX950-NEXT:    s_mov_b32 s1, 0x4008
+; GFX950-NEXT:    s_add_i32 s33, s0, s1
+; GFX950-NEXT:    s_mov_b64 s[2:3], s[4:5]
+; GFX950-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
+; GFX950-NEXT:    s_load_dwordx2 vcc, s[2:3], 0x8
+; GFX950-NEXT:    s_load_dwordx2 s[98:99], s[2:3], 0x10
+; GFX950-NEXT:    s_load_dwordx2 s[96:97], s[2:3], 0x18
+; GFX950-NEXT:    s_load_dwordx2 s[94:95], s[2:3], 0x20
+; GFX950-NEXT:    s_load_dwordx2 s[92:93], s[2:3], 0x28
+; GFX950-NEXT:    s_load_dwordx2 s[90:91], s[2:3], 0x30
+; GFX950-NEXT:    s_load_dwordx2 s[88:89], s[2:3], 0x38
+; GFX950-NEXT:    s_load_dwordx2 s[86:87], s[2:3], 0x40
+; GFX950-NEXT:    s_load_dwordx2 s[84:85], s[2:3], 0x48
+; GFX950-NEXT:    s_load_dwordx2 s[82:83], s[2:3], 0x50
+; GFX950-NEXT:    s_load_dwordx2 s[80:81], s[2:3], 0x58
+; GFX950-NEXT:    s_load_dwordx2 s[78:79], s[2:3], 0x60
+; GFX950-NEXT:    s_load_dwordx2 s[76:77], s[2:3], 0x68
+; GFX950-NEXT:    s_load_dwordx2 s[74:75], s[2:3], 0x70
+; GFX950-NEXT:    s_load_dwordx2 s[72:73], s[2:3], 0x78
+; GFX950-NEXT:    s_load_dwordx2 s[70:71], s[2:3], 0x80
+; GFX950-NEXT:    s_load_dwordx2 s[68:69], s[2:3], 0x88
+; GFX950-NEXT:    s_load_dwordx2 s[66:67], s[2:3], 0x90
+; GFX950-NEXT:    s_load_dwordx2 s[64:65], s[2:3], 0x98
+; GFX950-NEXT:    s_load_dwordx2 s[62:63], s[2:3], 0xa0
+; GFX950-NEXT:    s_load_dwordx2 s[60:61], s[2:3], 0xa8
+; GFX950-NEXT:    s_load_dwordx2 s[58:59], s[2:3], 0xb0
+; GFX950-NEXT:    s_load_dwordx2 s[56:57], s[2:3], 0xb8
+; GFX950-NEXT:    s_load_dwordx2 s[54:55], s[2:3], 0xc0
+; GFX950-NEXT:    s_load_dwordx2 s[52:53], s[2:3], 0xc8
+; GFX950-NEXT:    s_load_dwordx2 s[50:51], s[2:3], 0xd0
+; GFX950-NEXT:    s_load_dwordx2 s[48:49], s[2:3], 0xd8
+; GFX950-NEXT:    s_load_dwordx2 s[46:47], s[2:3], 0xe0
+; GFX950-NEXT:    s_load_dwordx2 s[44:45], s[2:3], 0xe8
+; GFX950-NEXT:    s_load_dwordx2 s[42:43], s[2:3], 0xf0
+; GFX950-NEXT:    s_load_dwordx2 s[40:41], s[2:3], 0xf8
+; GFX950-NEXT:    s_load_dwordx2 s[38:39], s[2:3], 0x100
+; GFX950-NEXT:    s_load_dwordx2 s[36:37], s[2:3], 0x108
+; GFX950-NEXT:    s_load_dwordx2 s[34:35], s[2:3], 0x110
+; GFX950-NEXT:    s_load_dwordx2 s[30:31], s[2:3], 0x118
+; GFX950-NEXT:    s_load_dwordx2 s[28:29], s[2:3], 0x120
+; GFX950-NEXT:    s_load_dwordx2 s[26:27], s[2:3], 0x128
+; GFX950-NEXT:    s_load_dwordx2 s[24:25], s[2:3], 0x130
+; GFX950-NEXT:    s_load_dwordx2 s[22:23], s[2:3], 0x138
+; GFX950-NEXT:    s_load_dwordx2 s[20:21], s[2:3], 0x140
+; GFX950-NEXT:    s_load_dwordx2 s[18:19], s[2:3], 0x148
+; GFX950-NEXT:    s_load_dwordx2 s[16:17], s[2:3], 0x150
+; GFX950-NEXT:    s_load_dwordx2 s[14:15], s[2:3], 0x158
+; GFX950-NEXT:    s_load_dwordx2 s[12:13], s[2:3], 0x160
+; GFX950-NEXT:    s_load_dwordx2 s[10:11], s[2:3], 0x168
+; GFX950-NEXT:    s_load_dwordx2 s[8:9], s[2:3], 0x170
+; GFX950-NEXT:    s_load_dwordx2 s[6:7], s[2:3], 0x178
+; GFX950-NEXT:    s_load_dwordx2 s[4:5], s[2:3], 0x180
+; GFX950-NEXT:    s_nop 0
+; GFX950-NEXT:    s_load_dwordx2 s[2:3], s[2:3], 0x188
+; GFX950-NEXT:    v_mov_b64_e32 v[0:1], 0
+; GFX950-NEXT:    scratch_store_dwordx2 off, v[0:1], s33
+; GFX950-NEXT:    scratch_store_dwordx2 off, v[0:1], s33 offset:-8
+; GFX950-NEXT:    v_mov_b64_e32 v[0:1], 0x384
+; GFX950-NEXT:    scratch_store_dwordx2 off, v[0:1], s33 offset:8
+; GFX950-NEXT:    s_mov_b32 s33, 0
+; GFX950-NEXT:    ; implicit-def: $vgpr2 : SGPR spill to VGPR lane
+; GFX950-NEXT:    v_writelane_b32 v2, s33, 0
+; GFX950-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX950-NEXT:    v_mov_b64_e32 v[0:1], s[0:1]
+; GFX950-NEXT:    v_readlane_b32 s0, v2, 0
+; GFX950-NEXT:    s_nop 4
+; GFX950-NEXT:    scratch_store_dwordx2 off, v[0:1], s0
+; GFX950-NEXT:    v_mov_b64_e32 v[0:1], vcc
+; GFX950-NEXT:    scratch_store_dwordx2 off, v[0:1], s0
+; GFX950-NEXT:    v_mov_b64_e32 v[0:1], s[98:99]
+; GFX950-NEXT:    scratch_store_dwordx2 off, v[0:1], s0
+; GFX950-NEXT:    v_mov_b64_e32 v[0:1], s[96:97]
+; GFX950-NEXT:    scratch_store_dwordx2 off, v[0:1], s0
+; GFX950-NEXT:    v_mov_b64_e32 v[0:1], s[94:95]
+; GFX950-NEXT:    scratch_store_dwordx2 off, v[0:1], s0
+; GFX950-NEXT:    v_mov_b64_e32 v[0:1], s[92:93]
+; GFX950-NEXT:    scratch_store_dwordx2 off, v[0:1], s0
+; GFX950-NEXT:    v_mov_b64_e32 v[0:1], s[90:91]
+; GFX950-NEXT:    scratch_store_dwordx2 off, v[0:1], s0
+; GFX950-NEXT:    v_mov_b64_e32 v[0:1], s[88:89]
+; GFX950-NEXT:    scratch_store_dwordx2 off, v[0:1], s0
+; GFX950-NEXT:    v_mov_b64_e32 v[0:1], s[86:87]
+; GFX950-NEXT:    scratch_store_dwordx2 off, v[0:1], s0
+; GFX950-NEXT:    v_mov_b64_e32 v[0:1], s[84:85]
+; GFX950-NEXT:    scratch_store_dwordx2 off, v[0:1], s0
+; GFX950-NEXT:    v_mov_b64_e32 v[0:1], s[82:83]
+; GFX950-NEXT:    scratch_store_dwordx2 off, v[0:1], s0
+; GFX950-NEXT:    v_mov_b64_e32 v[0:1], s[80:81]
+; GFX950-NEXT:    scratch_store_dwordx2 off, v[0:1], s0
+; GFX950-NEXT:    v_mov_b64_e32 v[0:1], s[78:79]
+; GFX950-NEXT:    scratch_store_dwordx2 off, v[0:1], s0
+; GFX950-NEXT:    v_mov_b64_e32 v[0:1], s[76:77]
+; GFX950-NEXT:    scratch_store_dwordx2 off, v[0:1], s0
+; GFX950-NEXT:    v_mov_b64_e32 v[0:1], s[74:75]
+; GFX950-NEXT:    scratch_store_dwordx2 off, v[0:1], s0
+; GFX950-NEXT:    v_mov_b64_e32 v[0:1], s[72:73]
+; GFX950-NEXT:    scratch_store_dwordx2 off, v[0:1], s0
+; GFX950-NEXT:    v_mov_b64_e32 v[0:1], s[70:71]
+; GFX950-NEXT:    scratch_store_dwordx2 off, v[0:1], s0
+; GFX950-NEXT:    v_mov_b64_e32 v[0:1], s[68:69]
+; GFX950-NEXT:    scratch_store_dwordx2 off, v[0:1], s0
+; GFX950-NEXT:    v_mov_b64_e32 v[0:1], s[66:67]
+; GFX950-NEXT:    scratch_store_dwordx2 off, v[0:1], s0
+; GFX950-NEXT:    v_mov_b64_e32 v[0:1], s[64:65]
+; GFX950-NEXT:    scratch_store_dwordx2 off, v[0:1], s0
+; GFX950-NEXT:    v_mov_b64_e32 v[0:1], s[62:63]
+; GFX950-NEXT:    scratch_store_dwordx2 off, v[0:1], s0
+; GFX950-NEXT:    v_mov_b64_e32 v[0:1], s[60:61]
+; GFX950-NEXT:    scratch_store_dwordx2 off, v[0:1], s0
+; GFX950-NEXT:    v_mov_b64_e32 v[0:1], s[58:59]
+; GFX950-NEXT:    scratch_store_dwordx2 off, v[0:1], s0
+; GFX950-NEXT:    v_mov_b64_e32 v[0:1], s[56:57]
+; GFX950-NEXT:    scratch_store_dwordx2 off, v[0:1], s0
+; GFX950-NEXT:    v_mov_b64_e32 v[0:1], s[54:55]
+; GFX950-NEXT:    scratch_store_dwordx2 off, v[0:1], s0
+; GFX950-NEXT:    v_mov_b64_e32 v[0:1], s[52:53]
+; GFX950-NEXT:    scratch_store_dwordx2 off, v[0:1], s0
+; GFX950-NEXT:    v_mov_b64_e32 v[0:1], s[50:51]
+; GFX950-NEXT:    scratch_store_dwordx2 off, v[0:1], s0
+; GFX950-NEXT:    v_mov_b64_e32 v[0:1], s[48:49]
+; GFX950-NEXT:    scratch_store_dwordx2 off, v[0:1], s0
+; GFX950-NEXT:    v_mov_b64_e32 v[0:1], s[46:47]
+; GFX950-NEXT:    scratch_store_dwordx2 off, v[0:1], s0
+; GFX950-NEXT:    v_mov_b64_e32 v[0:1], s[44:45]
+; GFX950-NEXT:    scratch_store_dwordx2 off, v[0:1], s0
+; GFX950-NEXT:    v_mov_b64_e32 v[0:1], s[42:43]
+; GFX950-NEXT:    scratch_store_dwordx2 off, v[0:1], s0
+; GFX950-NEXT:    v_mov_b64_e32 v[0:1], s[40:41]
+; GFX950-NEXT:    scratch_store_dwordx2 off, v[0:1], s0
+; GFX950-NEXT:    v_mov_b64_e32 v[0:1], s[38:39]
+; GFX950-NEXT:    scratch_store_dwordx2 off, v[0:1], s0
+; GFX950-NEXT:    v_mov_b64_e32 v[0:1], s[36:37]
+; GFX950-NEXT:    scratch_store_dwordx2 off, v[0:1], s0
+; GFX950-NEXT:    v_mov_b64_e32 v[0:1], s[34:35]
+; GFX950-NEXT:    scratch_store_dwordx2 off, v[0:1], s0
+; GFX950-NEXT:    v_mov_b64_e32 v[0:1], s[30:31]
+; GFX950-NEXT:    scratch_store_dwordx2 off, v[0:1], s0
+; GFX950-NEXT:    v_mov_b64_e32 v[0:1], s[28:29]
+; GFX950-NEXT:    scratch_store_dwordx2 off, v[0:1], s0
+; GFX950-NEXT:    v_mov_b64_e32 v[0:1], s[26:27]
+; GFX950-NEXT:    scratch_store_dwordx2 off, v[0:1], s0
+; GFX950-NEXT:    v_mov_b64_e32 v[0:1], s[24:25]
+; GFX950-NEXT:    scratch_store_dwordx2 off, v[0:1], s0
+; GFX950-NEXT:    v_mov_b64_e32 v[0:1], s[22:23]
+; GFX950-NEXT:    scratch_store_dwordx2 off, v[0:1], s0
+; GFX950-NEXT:    v_mov_b64_e32 v[0:1], s[20:21]
+; GFX950-NEXT:    scratch_store_dwordx2 off, v[0:1], s0
+; GFX950-NEXT:    v_mov_b64_e32 v[0:1], s[18:19]
+; GFX950-NEXT:    scratch_store_dwordx2 off, v[0:1], s0
+; GFX950-NEXT:    v_mov_b64_e32 v[0:1], s[16:17]
+; GFX950-NEXT:    scratch_store_dwordx2 off, v[0:1], s0
+; GFX950-NEXT:    v_mov_b64_e32 v[0:1], s[14:15]
+; GFX950-NEXT:    scratch_store_dwordx2 off, v[0:1], s0
+; GFX950-NEXT:    v_mov_b64_e32 v[0:1], s[12:13]
+; GFX950-NEXT:    scratch_store_dwordx2 off, v[0:1], s0
+; GFX950-NEXT:    v_mov_b64_e32 v[0:1], s[10:11]
+; GFX950-NEXT:    scratch_store_dwordx2 off, v[0:1], s0
+; GFX950-NEXT:    v_mov_b64_e32 v[0:1], s[8:9]
+; GFX950-NEXT:    scratch_store_dwordx2 off, v[0:1], s0
+; GFX950-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
+; GFX950-NEXT:    scratch_store_dwordx2 off, v[0:1], s0
+; GFX950-NEXT:    v_mov_b64_e32 v[0:1], s[4:5]
+; GFX950-NEXT:    scratch_store_dwordx2 off, v[0:1], s0
+; GFX950-NEXT:    v_mov_b64_e32 v[0:1], s[2:3]
+; GFX950-NEXT:    scratch_store_dwordx2 off, v[0:1], s0
+; GFX950-NEXT:    s_endpgm
+bb:
+  %alloca.big = alloca [4096 x i32], align 4, addrspace(5)
+  %alloca304 = alloca [2 x i64], align 8, addrspace(5)
+  %alloca307 = alloca i64, align 8, addrspace(5)
+  store [2 x i64] zeroinitializer, ptr addrspace(5) %alloca304, align 8
+  store i64 900, ptr addrspace(5) %alloca307, align 8
+  store i64 %arg, ptr addrspace(5) null, align 8
+  store i64 %arg1, ptr addrspace(5) null, align 8
+  store i64 %arg2, ptr addrspace(5) null, align 8
+  store i64 %arg3, ptr addrspace(5) null, align 8
+  store i64 %arg4, ptr addrspace(5) null, align 8
+  store i64 %arg5, ptr addrspace(5) null, align 8
+  store i64 %arg6, ptr addrspace(5) null, align 8
+  store i64 %arg7, ptr addrspace(5) null, align 8
+  store i64 %arg8, ptr addrspace(5) null, align 8
+  store i64 %arg9, ptr addrspace(5) null, align 8
+  store i64 %arg10, ptr addrspace(5) null, align 8
+  store i64 %arg11, ptr addrspace(5) null, align 8
+  store i64 %arg12, ptr addrspace(5) null, align 8
+  store i64 %arg13, ptr addrspace(5) null, align 8
+  store i64 %arg14, ptr addrspace(5) null, align 8
+  store i64 %arg15, ptr addrspace(5) null, align 8
+  store i64 %arg16, ptr addrspace(5) null, align 8
+  store i64 %arg17, ptr addrspace(5) null, align 8
+  store i64 %arg18, ptr addrspace(5) null, align 8
+  store i64 %arg19, ptr addrspace(5) null, align 8
+  store i64 %arg20, ptr addrspace(5) null, align 8
+  store i64 %arg21, ptr addrspace(5) null, align 8
+  store i64 %arg22, ptr addrspace(5) null, align 8
+  store i64 %arg23, ptr addrspace(5) null, align 8
+  store i64 %arg24, ptr addrspace(5) null, align 8
+  store i64 %arg25, ptr addrspace(5) null, align 8
+  store i64 %arg26, ptr addrspace(5) null, align 8
+  store i64 %arg27, ptr addrspace(5) null, align 8
+  store i64 %arg28, ptr addrspace(5) null, align 8
+  store i64 %arg29, ptr addrspace(5) null, align 8
+  store i64 %arg30, ptr addrspace(5) null, align 8
+  store i64 %arg31, ptr addrspace(5) null, align 8
+  store i64 %arg32, ptr addrspace(5) null, align 8
+  store i64 %arg33, ptr addrspace(5) null, align 8
+  store i64 %arg34, ptr addrspace(5) null, align 8
+  store i64 %arg35, ptr addrspace(5) null, align 8
+  store i64 %arg36, ptr addrspace(5) null, align 8
+  store i64 %arg37, ptr addrspace(5) null, align 8
+  store i64 %arg38, ptr addrspace(5) null, align 8
+  store i64 %arg39, ptr addrspace(5) null, align 8
+  store i64 %arg40, ptr addrspace(5) null, align 8
+  store i64 %arg41, ptr addrspace(5) null, align 8
+  store i64 %arg42, ptr addrspace(5) null, align 8
+  store i64 %arg43, ptr addrspace(5) null, align 8
+  store i64 %arg44, ptr addrspace(5) null, align 8
+  store i64 %arg45, ptr addrspace(5) null, align 8
+  store i64 %arg46, ptr addrspace(5) null, align 8
+  store i64 %arg47, ptr addrspace(5) null, align 8
+  store i64 %arg48, ptr addrspace(5) null, align 8
+  store i64 %arg49, ptr addrspace(5) null, align 8
+  ret void
+}
diff --git a/llvm/test/CodeGen/AMDGPU/flat-scratch-svs.ll b/llvm/test/CodeGen/AMDGPU/flat-scratch-svs.ll
index e01cb79382c05..737c811cd9d93 100644
--- a/llvm/test/CodeGen/AMDGPU/flat-scratch-svs.ll
+++ b/llvm/test/CodeGen/AMDGPU/flat-scratch-svs.ll
@@ -1582,8 +1582,7 @@ define amdgpu_kernel void @soff1_voff1_negative(i32 %soff) {
 ; GFX942-SDAG-NEXT:    v_mov_b32_e32 v1, 1
 ; GFX942-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX942-SDAG-NEXT:    v_add_u32_e32 v0, s0, v0
-; GFX942-SDAG-NEXT:    v_add_u32_e32 v0, -1, v0
-; GFX942-SDAG-NEXT:    scratch_store_byte v0, v1, off sc0 sc1
+; GFX942-SDAG-NEXT:    scratch_store_byte v0, v1, off offset:-1 sc0 sc1
 ; GFX942-SDAG-NEXT:    s_waitcnt vmcnt(0)
 ; GFX942-SDAG-NEXT:    s_endpgm
 ;
@@ -1593,8 +1592,8 @@ define amdgpu_kernel void @soff1_voff1_negative(i32 %soff) {
 ; GFX942-GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
 ; GFX942-GISEL-NEXT:    v_mov_b32_e32 v1, 1
 ; GFX942-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX942-GISEL-NEXT:    v_add3_u32 v0, s0, v0, -1
-; GFX942-GISEL-NEXT:    scratch_store_byte v0, v1, off sc0 sc1
+; GFX942-GISEL-NEXT:    v_add_u32_e32 v0, s0, v0
+; GFX942-GISEL-NEXT:    scratch_store_byte v0, v1, off offset:-1 sc0 sc1
 ; GFX942-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX942-GISEL-NEXT:    s_endpgm
 ;
diff --git a/llvm/test/CodeGen/AMDGPU/flat-scratch.ll b/llvm/test/CodeGen/AMDGPU/flat-scratch.ll
index 870b679a84d11..0863698dee499 100644
--- a/llvm/test/CodeGen/AMDGPU/flat-scratch.ll
+++ b/llvm/test/CodeGen/AMDGPU/flat-scratch.ll
@@ -4561,11 +4561,10 @@ define void @store_load_i32_negative_unaligned(ptr addrspace(5) nocapture %arg)
 ; GFX9-LABEL: store_load_i32_negative_unaligned:
 ; GFX9:       ; %bb.0: ; %bb
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_add_u32_e32 v0, -1, v0
 ; GFX9-NEXT:    v_mov_b32_e32 v1, 1
-; GFX9-NEXT:    scratch_store_byte v0, v1, off
+; GFX9-NEXT:    scratch_store_byte v0, v1, off offset:-1
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    scratch_load_ubyte v0, v0, off glc
+; GFX9-NEXT:    scratch_load_ubyte v0, v0, off offset:-1 glc
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -4632,22 +4631,20 @@ define void @store_load_i32_negative_unaligned(ptr addrspace(5) nocapture %arg)
 ; GFX9-PAL-LABEL: store_load_i32_negative_unaligned:
 ; GFX9-PAL:       ; %bb.0: ; %bb
 ; GFX9-PAL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-PAL-NEXT:    v_add_u32_e32 v0, -1, v0
 ; GFX9-PAL-NEXT:    v_mov_b32_e32 v1, 1
-; GFX9-PAL-NEXT:    scratch_store_byte v0, v1, off
+; GFX9-PAL-NEXT:    scratch_store_byte v0, v1, off offset:-1
 ; GFX9-PAL-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-PAL-NEXT:    scratch_load_ubyte v0, v0, off glc
+; GFX9-PAL-NEXT:    scratch_load_ubyte v0, v0, off offset:-1 glc
 ; GFX9-PAL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-PAL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX942-LABEL: store_load_i32_negative_unaligned:
 ; GFX942:       ; %bb.0: ; %bb
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_add_u32_e32 v0, -1, v0
 ; GFX942-NEXT:    v_mov_b32_e32 v1, 1
-; GFX942-NEXT:    scratch_store_byte v0, v1, off sc0 sc1
+; GFX942-NEXT:    scratch_store_byte v0, v1, off offset:-1 sc0 sc1
 ; GFX942-NEXT:    s_waitcnt vmcnt(0)
-; GFX942-NEXT:    scratch_load_ubyte v0, v0, off sc0 sc1
+; GFX942-NEXT:    scratch_load_ubyte v0, v0, off offset:-1 sc0 sc1
 ; GFX942-NEXT:    s_waitcnt vmcnt(0)
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -4732,11 +4729,11 @@ define void @store_load_i32_large_negative_unaligned(ptr addrspace(5) nocapture
 ; GFX9-LABEL: store_load_i32_large_negative_unaligned:
 ; GFX9:       ; %bb.0: ; %bb
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_add_u32_e32 v0, 0xffffef7f, v0
+; GFX9-NEXT:    v_add_u32_e32 v0, 0xfffff000, v0
 ; GFX9-NEXT:    v_mov_b32_e32 v1, 1
-; GFX9-NEXT:    scratch_store_byte v0, v1, off
+; GFX9-NEXT:    scratch_store_byte v0, v1, off offset:-129
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    scratch_load_ubyte v0, v0, off glc
+; GFX9-NEXT:    scratch_load_ubyte v0, v0, off offset:-129 glc
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -4805,22 +4802,22 @@ define void @store_load_i32_large_negative_unaligned(ptr addrspace(5) nocapture
 ; GFX9-PAL-LABEL: store_load_i32_large_negative_unaligned:
 ; GFX9-PAL:       ; %bb.0: ; %bb
 ; GFX9-PAL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-PAL-NEXT:    v_add_u32_e32 v0, 0xffffef7f, v0
+; GFX9-PAL-NEXT:    v_add_u32_e32 v0, 0xfffff000, v0
 ; GFX9-PAL-NEXT:    v_mov_b32_e32 v1, 1
-; GFX9-PAL-NEXT:    scratch_store_byte v0, v1, off
+; GFX9-PAL-NEXT:    scratch_store_byte v0, v1, off offset:-129
 ; GFX9-PAL-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-PAL-NEXT:    scratch_load_ubyte v0, v0, off glc
+; GFX9-PAL-NEXT:    scratch_load_ubyte v0, v0, off offset:-129 glc
 ; GFX9-PAL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-PAL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX942-LABEL: store_load_i32_large_negative_unaligned:
 ; GFX942:       ; %bb.0: ; %bb
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT:    v_add_u32_e32 v0, 0xffffef7f, v0
+; GFX942-NEXT:    v_add_u32_e32 v0, 0xfffff000, v0
 ; GFX942-NEXT:    v_mov_b32_e32 v1, 1
-; GFX942-NEXT:    scratch_store_byte v0, v1, off sc0 sc1
+; GFX942-NEXT:    scratch_store_byte v0, v1, off offset:-129 sc0 sc1
 ; GFX942-NEXT:    s_waitcnt vmcnt(0)
-; GFX942-NEXT:    scratch_load_ubyte v0, v0, off sc0 sc1
+; GFX942-NEXT:    scratch_load_ubyte v0, v0, off offset:-129 sc0 sc1
 ; GFX942-NEXT:    s_waitcnt vmcnt(0)
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -5485,9 +5482,8 @@ define amdgpu_gs void @sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset(pt
 ; GFX9-NEXT:    s_addc_u32 flat_scratch_hi, s1, 0
 ; GFX9-NEXT:    s_add_i32 s2, s2, s3
 ; GFX9-NEXT:    v_add_u32_e32 v0, s2, v0
-; GFX9-NEXT:    v_add_u32_e32 v0, -16, v0
 ; GFX9-NEXT:    v_mov_b32_e32 v1, 15
-; GFX9-NEXT:    scratch_store_dword v0, v1, off
+; GFX9-NEXT:    scratch_store_dword v0, v1, off offset:-16
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    s_endpgm
 ;
@@ -5531,8 +5527,7 @@ define amdgpu_gs void @sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset(pt
 ; GFX9-PAL-NEXT:    s_addc_u32 flat_scratch_hi, s3, 0
 ; GFX9-PAL-NEXT:    s_add_i32 s0, s0, s1
 ; GFX9-PAL-NEXT:    v_add_u32_e32 v0, s0, v0
-; GFX9-PAL-NEXT:    v_add_u32_e32 v0, -16, v0
-; GFX9-PAL-NEXT:    scratch_store_dword v0, v1, off
+; GFX9-PAL-NEXT:    scratch_store_dword v0, v1, off offset:-16
 ; GFX9-PAL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-PAL-NEXT:    s_endpgm
 ;
@@ -5540,9 +5535,8 @@ define amdgpu_gs void @sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset(pt
 ; GFX942:       ; %bb.0: ; %bb
 ; GFX942-NEXT:    s_add_i32 s0, s0, s1
 ; GFX942-NEXT:    v_add_u32_e32 v0, s0, v0
-; GFX942-NEXT:    v_add_u32_e32 v0, -16, v0
 ; GFX942-NEXT:    v_mov_b32_e32 v1, 15
-; GFX942-NEXT:    scratch_store_dword v0, v1, off sc0 sc1
+; GFX942-NEXT:    scratch_store_dword v0, v1, off offset:-16 sc0 sc1
 ; GFX942-NEXT:    s_waitcnt vmcnt(0)
 ; GFX942-NEXT:    s_endpgm
 ;
@@ -5591,8 +5585,7 @@ define amdgpu_gs void @sgpr_base_negative_offset(ptr addrspace(1) %out, ptr addr
 ; GFX9:       ; %bb.0: ; %entry
 ; GFX9-NEXT:    s_add_u32 flat_scratch_lo, s0, s5
 ; GFX9-NEXT:    s_addc_u32 flat_scratch_hi, s1, 0
-; GFX9-NEXT:    s_addk_i32 s2, 0xffe8
-; GFX9-NEXT:    scratch_load_dword v2, off, s2
+; GFX9-NEXT:    scratch_load_dword v2, off, s2 offset:-24
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    global_store_dword v[0:1], v2, off
 ; GFX9-NEXT:    s_endpgm
@@ -5631,16 +5624,14 @@ define amdgpu_gs void @sgpr_base_negative_offset(ptr addrspace(1) %out, ptr addr
 ; GFX9-PAL-NEXT:    s_and_b32 s3, s3, 0xffff
 ; GFX9-PAL-NEXT:    s_add_u32 flat_scratch_lo, s2, s5
 ; GFX9-PAL-NEXT:    s_addc_u32 flat_scratch_hi, s3, 0
-; GFX9-PAL-NEXT:    s_addk_i32 s0, 0xffe8
-; GFX9-PAL-NEXT:    scratch_load_dword v2, off, s0
+; GFX9-PAL-NEXT:    scratch_load_dword v2, off, s0 offset:-24
 ; GFX9-PAL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-PAL-NEXT:    global_store_dword v[0:1], v2, off
 ; GFX9-PAL-NEXT:    s_endpgm
 ;
 ; GFX942-LABEL: sgpr_base_negative_offset:
 ; GFX942:       ; %bb.0: ; %entry
-; GFX942-NEXT:    s_addk_i32 s0, 0xffe8
-; GFX942-NEXT:    scratch_load_dword v2, off, s0
+; GFX942-NEXT:    scratch_load_dword v2, off, s0 offset:-24
 ; GFX942-NEXT:    s_waitcnt vmcnt(0)
 ; GFX942-NEXT:    global_store_dword v[0:1], v2, off
 ; GFX942-NEXT:    s_endpgm
diff --git a/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll b/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll
index 5f0ca7bc42ae0..b23be8501b97e 100644
--- a/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll
+++ b/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll
@@ -294,28 +294,29 @@ define amdgpu_kernel void @local_stack_offset_uses_sp_flat(ptr addrspace(1) %out
 ; FLATSCR-NEXT:    s_add_u32 flat_scratch_lo, s8, s13
 ; FLATSCR-NEXT:    s_addc_u32 flat_scratch_hi, s9, 0
 ; FLATSCR-NEXT:    v_mov_b32_e32 v0, 0
-; FLATSCR-NEXT:    s_mov_b32 s0, 0
-; FLATSCR-NEXT:    scratch_store_dword off, v0, s0 offset:1024
+; FLATSCR-NEXT:    s_movk_i32 s1, 0x2000
+; FLATSCR-NEXT:    s_movk_i32 s0, 0x4010
+; FLATSCR-NEXT:    scratch_store_dword off, v0, s1
 ; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; FLATSCR-NEXT:    s_mov_b32 s1, 0
 ; FLATSCR-NEXT:  .LBB2_1: ; %loadstoreloop
 ; FLATSCR-NEXT:    ; =>This Inner Loop Header: Depth=1
-; FLATSCR-NEXT:    s_add_i32 s1, s0, 0x2000
-; FLATSCR-NEXT:    s_add_i32 s0, s0, 1
-; FLATSCR-NEXT:    s_cmpk_lt_u32 s0, 0x2120
-; FLATSCR-NEXT:    scratch_store_byte off, v0, s1
+; FLATSCR-NEXT:    s_add_i32 s2, s1, 0x4000
+; FLATSCR-NEXT:    s_add_i32 s1, s1, 1
+; FLATSCR-NEXT:    s_cmpk_lt_u32 s1, 0x2120
+; FLATSCR-NEXT:    scratch_store_byte off, v0, s2
 ; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
 ; FLATSCR-NEXT:    s_cbranch_scc1 .LBB2_1
 ; FLATSCR-NEXT:  ; %bb.2: ; %split
-; FLATSCR-NEXT:    s_movk_i32 s0, 0x1000
-; FLATSCR-NEXT:    s_addk_i32 s0, 0x2000
-; FLATSCR-NEXT:    scratch_load_dwordx2 v[8:9], off, s0 offset:720 glc
+; FLATSCR-NEXT:    s_movk_i32 s1, 0x1000
+; FLATSCR-NEXT:    s_addk_i32 s1, 0x4000
+; FLATSCR-NEXT:    scratch_load_dwordx2 v[8:9], off, s1 offset:720 glc
 ; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s0 offset:704 glc
+; FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s1 offset:704 glc
 ; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; FLATSCR-NEXT:    s_movk_i32 s0, 0x2000
-; FLATSCR-NEXT:    scratch_load_dwordx2 v[10:11], off, s0 offset:16 glc
+; FLATSCR-NEXT:    scratch_load_dwordx2 v[10:11], off, s0 glc
 ; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; FLATSCR-NEXT:    scratch_load_dwordx4 v[4:7], off, s0 glc
+; FLATSCR-NEXT:    scratch_load_dwordx4 v[4:7], off, s0 offset:-16 glc
 ; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
 ; FLATSCR-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
 ; FLATSCR-NEXT:    v_mov_b32_e32 v12, 0



More information about the llvm-commits mailing list